from google.colab import files
uploaded = files.upload()
Saving used_cars_data.csv to used_cars_data (1).csv
# Libraries to help with reading and manipulating data
import numpy as np
import pandas as pd
#Libraries to help with data visualization
import matplotlib.pyplot as plt
import seaborn as sns
# to split the data into train and test
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
# Removes the limit for the number of displayed columns
pd.set_option("display.max_columns", None)
# Sets the limit for the number of displayed rows
pd.set_option("display.max_rows", 200)
import tensorflow as tf #An end-to-end open source machine learning platform
from tensorflow import keras # High-level neural networks API for deep learning.
from keras import backend # Abstraction layer for neural network backend engines.
from keras.models import Sequential # Model for building NN sequentially.
from keras.layers import Dense
# Set the seed using keras.utils.set_random_seed. This will set:
# 1) `numpy` seed
# 2) backend random seed
# 3) `python` random seed
keras.utils.set_random_seed(812)
# If using TensorFlow, this will make GPU ops as deterministic as possible,
# but it will affect the overall performance, so be mindful of that.
tf.config.experimental.enable_op_determinism()
# to suppress warnings
import warnings
warnings.filterwarnings("ignore")
# Importing the dataset
data = pd.read_csv('/content/used_cars_data.csv')
# Check the top five records of the data
data.head()
| Location | Year | Kilometers_Driven | Fuel_Type | Transmission | Owner_Type | Seats | New_Price | Price | mileage_num | engine_num | power_num | Brand | Model | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Mumbai | 2010 | 72000.0 | CNG | Manual | First | 5.0 | 5.51 | 1.75 | 26.60 | 998.0 | 58.16 | maruti | wagon |
| 1 | Pune | 2015 | 41000.0 | Diesel | Manual | First | 5.0 | 16.06 | 12.50 | 19.67 | 1582.0 | 126.20 | hyundai | creta |
| 2 | Chennai | 2011 | 46000.0 | Petrol | Manual | First | 5.0 | 8.61 | 4.50 | 18.20 | 1199.0 | 88.70 | honda | jazz |
| 3 | Chennai | 2012 | 87000.0 | Diesel | Manual | First | 7.0 | 11.27 | 6.00 | 20.77 | 1248.0 | 88.76 | maruti | ertiga |
| 4 | Coimbatore | 2013 | 40670.0 | Diesel | Automatic | Second | 5.0 | 53.14 | 17.74 | 15.20 | 1968.0 | 140.80 | audi | a4 |
# checking shape of the data
print(f"There are {data.shape[0]} rows and {data.shape[1]} columns.")
There are 7252 rows and 14 columns.
# let's view a sample of the data
data.sample(n=10, random_state=1)
| Location | Year | Kilometers_Driven | Fuel_Type | Transmission | Owner_Type | Seats | New_Price | Price | mileage_num | engine_num | power_num | Brand | Model | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2397 | Kolkata | 2016 | 21460.0 | Petrol | Manual | First | 5.0 | 9.470 | 6.00 | 17.00 | 1497.0 | 121.36 | ford | ecosport |
| 6218 | Kolkata | 2013 | 48000.0 | Diesel | Manual | First | 5.0 | 7.880 | NaN | 23.40 | 1248.0 | 74.00 | maruti | swift |
| 6737 | Mumbai | 2015 | 59500.0 | Petrol | Manual | First | 7.0 | 13.580 | NaN | 17.30 | 1497.0 | 117.30 | honda | mobilio |
| 3659 | Delhi | 2015 | 27000.0 | Petrol | Automatic | First | 5.0 | 9.600 | 5.95 | 19.00 | 1199.0 | 88.70 | honda | jazz |
| 4513 | Bangalore | 2015 | 19000.0 | Diesel | Automatic | Second | 5.0 | 69.675 | 38.00 | 16.36 | 2179.0 | 187.70 | jaguar | xf |
| 599 | Coimbatore | 2019 | 40674.0 | Diesel | Automatic | First | 7.0 | 28.050 | 24.82 | 11.36 | 2755.0 | 171.50 | toyota | innova |
| 186 | Bangalore | 2014 | 37382.0 | Diesel | Automatic | First | 5.0 | 86.970 | 32.00 | 13.00 | 2143.0 | 201.10 | mercedes-benz | e-class |
| 305 | Kochi | 2014 | 61726.0 | Diesel | Automatic | First | 5.0 | 67.100 | 20.77 | 17.68 | 1968.0 | 174.33 | audi | a6 |
| 4581 | Hyderabad | 2013 | 105000.0 | Diesel | Automatic | First | 5.0 | 44.800 | 19.00 | 17.32 | 1968.0 | 150.00 | audi | q3 |
| 6616 | Delhi | 2014 | 55000.0 | Diesel | Automatic | First | 5.0 | 49.490 | NaN | 11.78 | 2143.0 | 167.62 | mercedes-benz | new |
# checking column datatypes and number of non-null values
df = data.copy()
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7252 entries, 0 to 7251 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Location 7252 non-null object 1 Year 7252 non-null int64 2 Kilometers_Driven 7251 non-null float64 3 Fuel_Type 7252 non-null object 4 Transmission 7252 non-null object 5 Owner_Type 7252 non-null object 6 Seats 7199 non-null float64 7 New_Price 7252 non-null float64 8 Price 6019 non-null float64 9 mileage_num 7169 non-null float64 10 engine_num 7206 non-null float64 11 power_num 7077 non-null float64 12 Brand 7252 non-null object 13 Model 7252 non-null object dtypes: float64(7), int64(1), object(6) memory usage: 793.3+ KB
6 columns have an object data type and and 8 ahve numerical datatypes ( 7 float data types and one integer data type)
# checking for duplicate values
df.duplicated().sum()
2
There are two duplicate values in the data. They will be explored.
df[df.duplicated(keep=False) == True]
| Location | Year | Kilometers_Driven | Fuel_Type | Transmission | Owner_Type | Seats | New_Price | Price | mileage_num | engine_num | power_num | Brand | Model | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3623 | Hyderabad | 2007 | 52195.0 | Petrol | Manual | First | 5.0 | 4.36 | 1.75 | 19.7 | 796.0 | 46.3 | maruti | alto |
| 4781 | Hyderabad | 2007 | 52195.0 | Petrol | Manual | First | 5.0 | 4.36 | 1.75 | 19.7 | 796.0 | 46.3 | maruti | alto |
| 6940 | Kolkata | 2017 | 13000.0 | Diesel | Manual | First | 5.0 | 13.58 | NaN | 26.0 | 1498.0 | 98.6 | honda | city |
| 7077 | Kolkata | 2017 | 13000.0 | Diesel | Manual | First | 5.0 | 13.58 | NaN | 26.0 | 1498.0 | 98.6 | honda | city |
There is a good chance that two of the cars of the same build were sold in the same location, but it is highly unlikely that they will have driven the same number of kilometers. We will drop the row which ocurs second.
df.drop(4781, inplace=True)
df.drop(7077, inplace=True)
# checking for duplicate values
df.duplicated().sum()
0
df.isnull().sum()
| 0 | |
|---|---|
| Location | 0 |
| Year | 0 |
| Kilometers_Driven | 1 |
| Fuel_Type | 0 |
| Transmission | 0 |
| Owner_Type | 0 |
| Seats | 53 |
| New_Price | 0 |
| Price | 1232 |
| mileage_num | 83 |
| engine_num | 46 |
| power_num | 175 |
| Brand | 0 |
| Model | 0 |
We have a lot of missing data from Kilometers driven, Seats, Price, mileage_num, engine_num, power_num which can be treated in data pre-processing.
We will drop the rows where Price is missing as it is the target variable
# function to create labeled barplots
def labeled_barplot(data, feature, perc=False, n=None):
"""
Barplot with percentage at the top
data: dataframe
feature: dataframe column
perc: whether to display percentages instead of count (default is False)
n: displays the top n category levels (default is None, i.e., display all levels)
"""
total = len(data[feature]) # length of the column
count = data[feature].nunique()
if n is None:
plt.figure(figsize=(count + 1, 5))
else:
plt.figure(figsize=(n + 1, 5))
plt.xticks(rotation=90, fontsize=15)
ax = sns.countplot(
data=data,
x=feature,
palette="Paired",
order=data[feature].value_counts().index[:n].sort_values(),
)
for p in ax.patches:
if perc == True:
label = "{:.1f}%".format(
100 * p.get_height() / total
) # percentage of each class of the category
else:
label = p.get_height() # count of each level of the category
x = p.get_x() + p.get_width() / 2 # width of the plot
y = p.get_height() # height of the plot
ax.annotate(
label,
(x, y),
ha="center",
va="center",
size=12,
xytext=(0, 5),
textcoords="offset points",
) # annotate the percentage
plt.show() # show the plot
# function to plot a boxplot and a histogram along the same scale.
def histogram_boxplot(data, feature, figsize=(12, 7), kde=False, bins=None):
"""
Boxplot and histogram combined
data: dataframe
feature: dataframe column
figsize: size of figure (default (12,7))
kde: whether to the show density curve (default False)
bins: number of bins for histogram (default None)
"""
f2, (ax_box2, ax_hist2) = plt.subplots(
nrows=2, # Number of rows of the subplot grid= 2
sharex=True, # x-axis will be shared among all subplots
gridspec_kw={"height_ratios": (0.25, 0.75)},
figsize=figsize,
) # creating the 2 subplots
sns.boxplot(
data=data, x=feature, ax=ax_box2, showmeans=True, color="violet"
) # boxplot will be created and a star will indicate the mean value of the column
sns.histplot(
data=data, x=feature, kde=kde, ax=ax_hist2, bins=bins, palette="winter"
) if bins else sns.histplot(
data=data, x=feature, kde=kde, ax=ax_hist2
) # For histogram
ax_hist2.axvline(
data[feature].mean(), color="green", linestyle="--"
) # Add mean to the histogram
ax_hist2.axvline(
data[feature].median(), color="black", linestyle="-"
) # Add median to the histogram
Univariate Data Analysis
df1 = df.copy()
histogram_boxplot(df1, "Kilometers_Driven", bins=100, kde=True)
Highly right-skewed distribution. A log transformation will be used on this column.
df1["kilometers_driven_log"] = np.log(df1["Kilometers_Driven"])
histogram_boxplot(df1, "kilometers_driven_log", bins=100, kde=True)
The transformation has made it less skewed
histogram_boxplot(df1, "mileage_num", kde=True)
Close to a normal distribution. It seems like around 50% of cars have a mileage below 18 miles per gallon
histogram_boxplot(df1, "engine_num", kde=True)
There are few cars with higher engine displacement volume
histogram_boxplot(df1, "power_num", kde=True)
There are fewer cars with higher engine power
# creating histograms
df.hist(figsize=(14, 14))
plt.show()
histogram_boxplot(df1, "New_Price", kde=True)
df1["New_price_log"] = np.log(df1["New_Price"])
histogram_boxplot(df1, "New_price_log", bins=100, kde=True)
BRAND
labeled_barplot(df1, "Brand", perc=True, n=10)
The used cars that are most in demand are maruti and hyundai
labeled_barplot(df1, "Location", perc=True)
labeled_barplot(df1, "Fuel_Type", perc=True)
labeled_barplot(df1, "Seats", perc=True)
labeled_barplot(df1, "Transmission", perc=True)
labeled_barplot(df1, "Owner_Type", perc=True)
Most used cars sold operated on diesel and petrol. Around 1 percent of cars did not.
# If 'Location' needs to be converted to a numeric representation (e.g., using one-hot encoding):
!pip install scikit-learn
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
# Apply one-hot encoding to the categorical columns
df_encoded = pd.get_dummies(df, columns=['Location', 'Fuel_Type', 'Transmission', 'Owner_Type', 'Brand', 'Model'])
# Select the relevant numeric columns, including one-hot encoded features if necessary
numeric_columns = ['Year', 'Kilometers_Driven', 'Seats', 'New_Price', 'Price', 'mileage_num', 'engine_num', 'power_num']
# Create a correlation matrix for the selected numeric columns
correlation_matrix = df_encoded[numeric_columns].corr()
# Plot the correlation matrix as a heatmap
plt.figure(figsize=(15, 7))
sns.heatmap(
correlation_matrix, annot=True, vmin=-1, vmax=1, fmt=".2f", cmap="Spectral"
)
plt.show()
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.3.2) Requirement already satisfied: numpy<2.0,>=1.17.3 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.26.4) Requirement already satisfied: scipy>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.13.1) Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2) Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)
Power and engine are very important predictors of price. The price of a new car of the same model seems to be an important predictor of the used car price, which makes sense.
Price vs Transmission¶plt.figure(figsize=(5, 5))
sns.boxplot(x="Transmission", y="Price", data=df1)
plt.show()
Price vs Brand¶plt.figure(figsize=(18, 5))
sns.boxplot(x="Brand", y="Price", data=df1)
plt.xticks(rotation=90)
plt.show()
Price vs Fuel_Type¶Price vs Owner_Type¶plt.figure(figsize=(18, 5))
sns.boxplot(x="Owner_Type", y="Price", data=df1)
plt.show()
sns.pairplot(data=df, hue="Fuel_Type")
plt.show()
Zooming into these plots gives us a lot of information.
Kilometers_Driven does not seem to have a linear relationship with the price.Price has a positive relationship with Year, i.e., the newer the car, the higher the price.
2 seater cars are all luxury variants. Cars with 8-10 seats are exclusively mid to high range.
New_Price and used car price are also positively correlated, which is expected.Kilometers_Driven has a peculiar relationship with the Year variable. Generally, the newer the car lesser the distance it has traveled, but this is not always true.Mileage. The mileage of these cars is very high.plt.figure(figsize=(18, 5))
sns.boxplot(x="Fuel_Type", y="Price", data=df1)
plt.show()
Price vs Location
plt.figure(figsize=(12, 5))
sns.boxplot(x="Location", y="Price", data=df1)
plt.show()
It appears that cars from Bangalore and Coimbatore have higher IQR. The data is more spread out. The remaining locations have low IQRs therefore the middle data is more tightly clustered, indicating less variability in prices.
Price vs Brand
plt.figure(figsize=(18, 5))
sns.boxplot(x="Brand", y="Price", data=df)
plt.xticks(rotation=90)
plt.show()
Luxury cars such as bentley and lamborghini do not have much variability in price as seen in their IQR. Budget cars are similar, but their prices are very low. Mid tier cars such as BMW, Mercedes Benz, Jaguars, Land Rovers, and Audi have larger IQRs indicating higher variability in pricing. The stronger the brand the higher the price, premium brands command higher pricing and budget brands command lower pricing.
Price Vs Year
plt.figure(figsize=(18, 5))
sns.boxplot(x="Year", y="Price", data=df1)
plt.show()
The price of used cars has increased over the years
Data Pre-processing
Let's drop the NaN in the Price Column
# considering only the data points where price is not missing
df = df[df["Price"].notna()].copy()
# checking for missing values
df.isnull().sum()
| 0 | |
|---|---|
| Location | 0 |
| Year | 0 |
| Kilometers_Driven | 1 |
| Fuel_Type | 0 |
| Transmission | 0 |
| Owner_Type | 0 |
| Seats | 42 |
| New_Price | 0 |
| Price | 0 |
| mileage_num | 70 |
| engine_num | 36 |
| power_num | 143 |
| Brand | 0 |
| Model | 0 |
Encoding the categorical variables
df.dtypes
| 0 | |
|---|---|
| Location | object |
| Year | int64 |
| Kilometers_Driven | float64 |
| Fuel_Type | object |
| Transmission | object |
| Owner_Type | object |
| Seats | float64 |
| New_Price | float64 |
| Price | float64 |
| mileage_num | float64 |
| engine_num | float64 |
| power_num | float64 |
| Brand | object |
| Model | object |
data_car = df[['Brand', 'Model']].copy()
df = pd.get_dummies(df,
columns=df.select_dtypes(include=["object","int64"]).columns.tolist(),
drop_first=True,dtype=int
)
# Adding Brand and Model which is stored in data_car variable
# These will be needed during missing value imputation
df_final = pd.concat([df,data_car], axis=1)
df_final.shape
(6018, 287)
df_final.head()
| Kilometers_Driven | Seats | New_Price | Price | mileage_num | engine_num | power_num | Location_Bangalore | Location_Chennai | Location_Coimbatore | Location_Delhi | Location_Hyderabad | Location_Jaipur | Location_Kochi | Location_Kolkata | Location_Mumbai | Location_Pune | Year_1999 | Year_2000 | Year_2001 | Year_2002 | Year_2003 | Year_2004 | Year_2005 | Year_2006 | Year_2007 | Year_2008 | Year_2009 | Year_2010 | Year_2011 | Year_2012 | Year_2013 | Year_2014 | Year_2015 | Year_2016 | Year_2017 | Year_2018 | Year_2019 | Fuel_Type_Diesel | Fuel_Type_Electric | Fuel_Type_LPG | Fuel_Type_Petrol | Transmission_Manual | Owner_Type_Fourth & Above | Owner_Type_Second | Owner_Type_Third | Brand_audi | Brand_bentley | Brand_bmw | Brand_chevrolet | Brand_datsun | Brand_fiat | Brand_force | Brand_ford | Brand_honda | Brand_hyundai | Brand_isuzu | Brand_jaguar | Brand_jeep | Brand_lamborghini | Brand_land | Brand_mahindra | Brand_maruti | Brand_mercedes-benz | Brand_mini | Brand_mitsubishi | Brand_nissan | Brand_porsche | Brand_renault | Brand_skoda | Brand_smart | Brand_tata | Brand_toyota | Brand_volkswagen | Brand_volvo | Model_1000 | Model_3 | Model_5 | Model_6 | Model_7 | Model_800 | Model_a | Model_a-star | Model_a3 | Model_a4 | Model_a6 | Model_a7 | Model_a8 | Model_accent | Model_accord | Model_alto | Model_amaze | Model_ameo | Model_aspire | Model_aveo | Model_avventura | Model_b | Model_baleno | Model_beat | Model_beetle | Model_bolero | Model_bolt | Model_boxster | Model_br-v | Model_brio | Model_brv | Model_c-class | Model_camry | Model_captiva | Model_captur | Model_cayenne | Model_cayman | Model_cedia | Model_celerio | Model_ciaz | Model_city | Model_civic | Model_cla | Model_classic | Model_cls-class | Model_clubman | Model_compass | Model_continental | Model_cooper | Model_corolla | Model_countryman | Model_cr-v | Model_creta | Model_crosspolo | Model_cruze | Model_d-max | Model_duster | Model_dzire | Model_e | Model_e-class | Model_ecosport | Model_eeco | Model_elantra | Model_elite | Model_endeavour | Model_enjoy | Model_eon | Model_ertiga | Model_esteem | Model_estilo | Model_etios | Model_evalia | Model_f | Model_fabia | Model_fiesta | Model_figo | Model_fluence | Model_fortuner | Model_fortwo | Model_freestyle | Model_fusion | Model_gallardo | Model_getz | Model_gl-class | Model_gla | Model_glc | Model_gle | Model_gls | Model_go | Model_grand | Model_grande | Model_hexa | Model_i10 | Model_i20 | Model_ignis | Model_ikon | Model_indica | Model_indigo | Model_innova | Model_jazz | Model_jeep | Model_jetta | Model_koleos | Model_kuv | Model_kwid | Model_lancer | Model_laura | Model_linea | Model_lodgy | Model_logan | Model_m-class | Model_manza | Model_micra | Model_mobilio | Model_montero | Model_mustang | Model_mux | Model_nano | Model_new | Model_nexon | Model_nuvosport | Model_octavia | Model_omni | Model_one | Model_optra | Model_outlander | Model_pajero | Model_panamera | Model_passat | Model_petra | Model_platinum | Model_polo | Model_prius | Model_pulse | Model_punto | Model_q3 | Model_q5 | Model_q7 | Model_qualis | Model_quanto | Model_r-class | Model_rapid | Model_redi | Model_redi-go | Model_renault | Model_ritz | Model_rover | Model_rs5 | Model_s | Model_s-class | Model_s-cross | Model_s60 | Model_s80 | Model_safari | Model_sail | Model_santa | Model_santro | Model_scala | Model_scorpio | Model_siena | Model_sl-class | Model_slc | Model_slk-class | Model_sonata | Model_spark | Model_ssangyong | Model_sumo | Model_sunny | Model_superb | Model_swift | Model_sx4 | Model_tavera | Model_teana | Model_terrano | Model_thar | Model_tiago | Model_tigor | Model_tiguan | Model_tt | Model_tucson | Model_tuv | Model_v40 | Model_vento | Model_venture | Model_verito | Model_verna | Model_versa | Model_vitara | Model_wagon | Model_wr-v | Model_wrv | Model_x-trail | Model_x1 | Model_x3 | Model_x5 | Model_x6 | Model_xc60 | Model_xc90 | Model_xcent | Model_xe | Model_xenon | Model_xf | Model_xj | Model_xuv300 | Model_xuv500 | Model_xylo | Model_yeti | Model_z4 | Model_zen | Model_zest | Brand | Model | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 72000.0 | 5.0 | 5.51 | 1.75 | 26.60 | 998.0 | 58.16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maruti | wagon |
| 1 | 41000.0 | 5.0 | 16.06 | 12.50 | 19.67 | 1582.0 | 126.20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | hyundai | creta |
| 2 | 46000.0 | 5.0 | 8.61 | 4.50 | 18.20 | 1199.0 | 88.70 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | honda | jazz |
| 3 | 87000.0 | 7.0 | 11.27 | 6.00 | 20.77 | 1248.0 | 88.76 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maruti | ertiga |
| 4 | 40670.0 | 5.0 | 53.14 | 17.74 | 15.20 | 1968.0 | 140.80 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | audi | a4 |
Train Test Split
# defining the dependent and independent variables
X = df_final.drop(["Price"], axis=1)
y = df_final["Price"]
# splitting the data in 80:20 ratio for train and temporary data
x_train, x_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2,random_state=1)
# splitting the temporary data in 50:50 ratio for validation and test data
x_val,x_test,y_val,y_test = train_test_split(x_temp,y_temp,test_size=0.5,random_state=1)
print("Number of rows in train data =", x_train.shape[0])
print("Number of rows in validation data =", x_val.shape[0])
print("Number of rows in test data =", x_test.shape[0])
Number of rows in train data = 4814 Number of rows in validation data = 602 Number of rows in test data = 602
def print_missing_values_columns(df):
"""
Filters and prints only the columns from the DataFrame df that contain missing values.
Parameters:
- df: DataFrame
The DataFrame to check for missing values.
"""
missing_values_columns = df.columns[df.isnull().any()]
missing_values_counts = df[missing_values_columns].isnull().sum()
print(missing_values_counts)
# train data
print_missing_values_columns(x_train)
Kilometers_Driven 1 Seats 39 mileage_num 59 engine_num 34 power_num 116 dtype: int64
# validation data
print_missing_values_columns(x_val)
Seats 1 mileage_num 5 power_num 13 dtype: int64
# test data
print_missing_values_columns(x_test)
Seats 2 mileage_num 6 engine_num 2 power_num 14 dtype: int64
We'll impute these missing values one-by-one by taking the median number of seats for the particular car using the Brand and Model.
# first, we calculate the median of Seats in the train set grouped by Brand and Model and store in train_grouped_median
train_grouped_median = x_train.groupby(["Brand", "Model"])["Seats"].median()
train_grouped_median
| Seats | ||
|---|---|---|
| Brand | Model | |
| ambassador | classic | 5.0 |
| audi | a3 | 5.0 |
| a4 | 5.0 | |
| a6 | 5.0 | |
| a7 | 5.0 | |
| ... | ... | ... |
| volvo | s60 | 5.0 |
| s80 | 5.0 | |
| v40 | 5.0 | |
| xc60 | 5.0 | |
| xc90 | 7.0 |
209 rows × 1 columns
Working of the above code
It groups the training dataset x_train by the columns Brand and Model Within each group, it selects the Seats column Then, it calculates the median of the Seats column for each group This step effectively creates a mapping of the median number of seats for each unique combination of Brand and Model
# we will use the calculated median (train_grouped_median) to fill missing values in Seats for corresponding groups in the train set
x_train["Seats"] = x_train.apply(lambda row: row["Seats"] if not pd.isna(row["Seats"]) else train_grouped_median.get((row["Brand"], row["Model"]), np.nan), axis=1)
Working of the above code
For each row in the training dataset x_train:
It checks if the value in the selected row of the Seats column (row["Seats"]) is not NaN (pd.isna(row["Seats"]))
If the value is not NaN (i.e., it's not missing), it keeps the original value (row["Seats"])
If the value is NaN (missing), it uses train_grouped_median.get((row["Brand"], row["Model"]), np.nan) to fetch the median value for the corresponding Brand and Model combination from the train_grouped_median mapping created previously
If there's no corresponding median value (i.e., the combination of Brand and Model doesn't exist in train_grouped_median), it assigns NaN (np.nan). This step essentially fills missing values in the Seats column of the validation dataset x_train using the median values calculated from the training dataset. It ensures that the imputation is done based on the specific Brand and Model combination, preserving the relationship between these features and the Seats column.
# checking data points where Seats is still missing
x_train[x_train["Seats"].isnull()]
| Kilometers_Driven | Seats | New_Price | mileage_num | engine_num | power_num | Location_Bangalore | Location_Chennai | Location_Coimbatore | Location_Delhi | Location_Hyderabad | Location_Jaipur | Location_Kochi | Location_Kolkata | Location_Mumbai | Location_Pune | Year_1999 | Year_2000 | Year_2001 | Year_2002 | Year_2003 | Year_2004 | Year_2005 | Year_2006 | Year_2007 | Year_2008 | Year_2009 | Year_2010 | Year_2011 | Year_2012 | Year_2013 | Year_2014 | Year_2015 | Year_2016 | Year_2017 | Year_2018 | Year_2019 | Fuel_Type_Diesel | Fuel_Type_Electric | Fuel_Type_LPG | Fuel_Type_Petrol | Transmission_Manual | Owner_Type_Fourth & Above | Owner_Type_Second | Owner_Type_Third | Brand_audi | Brand_bentley | Brand_bmw | Brand_chevrolet | Brand_datsun | Brand_fiat | Brand_force | Brand_ford | Brand_honda | Brand_hyundai | Brand_isuzu | Brand_jaguar | Brand_jeep | Brand_lamborghini | Brand_land | Brand_mahindra | Brand_maruti | Brand_mercedes-benz | Brand_mini | Brand_mitsubishi | Brand_nissan | Brand_porsche | Brand_renault | Brand_skoda | Brand_smart | Brand_tata | Brand_toyota | Brand_volkswagen | Brand_volvo | Model_1000 | Model_3 | Model_5 | Model_6 | Model_7 | Model_800 | Model_a | Model_a-star | Model_a3 | Model_a4 | Model_a6 | Model_a7 | Model_a8 | Model_accent | Model_accord | Model_alto | Model_amaze | Model_ameo | Model_aspire | Model_aveo | Model_avventura | Model_b | Model_baleno | Model_beat | Model_beetle | Model_bolero | Model_bolt | Model_boxster | Model_br-v | Model_brio | Model_brv | Model_c-class | Model_camry | Model_captiva | Model_captur | Model_cayenne | Model_cayman | Model_cedia | Model_celerio | Model_ciaz | Model_city | Model_civic | Model_cla | Model_classic | Model_cls-class | Model_clubman | Model_compass | Model_continental | Model_cooper | Model_corolla | Model_countryman | Model_cr-v | Model_creta | Model_crosspolo | Model_cruze | Model_d-max | Model_duster | Model_dzire | Model_e | Model_e-class | Model_ecosport | Model_eeco | Model_elantra | Model_elite | Model_endeavour | Model_enjoy | Model_eon | Model_ertiga | Model_esteem | Model_estilo | Model_etios | Model_evalia | Model_f | Model_fabia | Model_fiesta | Model_figo | Model_fluence | Model_fortuner | Model_fortwo | Model_freestyle | Model_fusion | Model_gallardo | Model_getz | Model_gl-class | Model_gla | Model_glc | Model_gle | Model_gls | Model_go | Model_grand | Model_grande | Model_hexa | Model_i10 | Model_i20 | Model_ignis | Model_ikon | Model_indica | Model_indigo | Model_innova | Model_jazz | Model_jeep | Model_jetta | Model_koleos | Model_kuv | Model_kwid | Model_lancer | Model_laura | Model_linea | Model_lodgy | Model_logan | Model_m-class | Model_manza | Model_micra | Model_mobilio | Model_montero | Model_mustang | Model_mux | Model_nano | Model_new | Model_nexon | Model_nuvosport | Model_octavia | Model_omni | Model_one | Model_optra | Model_outlander | Model_pajero | Model_panamera | Model_passat | Model_petra | Model_platinum | Model_polo | Model_prius | Model_pulse | Model_punto | Model_q3 | Model_q5 | Model_q7 | Model_qualis | Model_quanto | Model_r-class | Model_rapid | Model_redi | Model_redi-go | Model_renault | Model_ritz | Model_rover | Model_rs5 | Model_s | Model_s-class | Model_s-cross | Model_s60 | Model_s80 | Model_safari | Model_sail | Model_santa | Model_santro | Model_scala | Model_scorpio | Model_siena | Model_sl-class | Model_slc | Model_slk-class | Model_sonata | Model_spark | Model_ssangyong | Model_sumo | Model_sunny | Model_superb | Model_swift | Model_sx4 | Model_tavera | Model_teana | Model_terrano | Model_thar | Model_tiago | Model_tigor | Model_tiguan | Model_tt | Model_tucson | Model_tuv | Model_v40 | Model_vento | Model_venture | Model_verito | Model_verna | Model_versa | Model_vitara | Model_wagon | Model_wr-v | Model_wrv | Model_x-trail | Model_x1 | Model_x3 | Model_x5 | Model_x6 | Model_xc60 | Model_xc90 | Model_xcent | Model_xe | Model_xenon | Model_xf | Model_xj | Model_xuv300 | Model_xuv500 | Model_xylo | Model_yeti | Model_z4 | Model_zen | Model_zest | Brand | Model | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2369 | 56000.0 | NaN | 7.88 | 19.5 | 1061.0 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maruti | estilo |
| 5893 | 51000.0 | NaN | 7.88 | 19.5 | 1061.0 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maruti | estilo |
Maruti Estilo can accommodate 5 people.
x_train["Seats"] = x_train["Seats"].fillna(5.0)
# we will use the calculated median (train_grouped_median) to fill missing values in Seats for corresponding groups in the validation set
x_val["Seats"] = x_val.apply(lambda row: row["Seats"] if not pd.isna(row["Seats"]) else train_grouped_median.get((row["Brand"], row["Model"]), np.nan), axis=1)
The above code does the same operation as the one previously used for imputing missing values The only difference is that it operates on the validation set (x_val) instead of the training set (x_train)
# checking the missing values in x_val
print_missing_values_columns(x_val)
Seats 1 mileage_num 5 power_num 13 dtype: int64
# checking data points where Seats is still missing
x_val[x_val["Seats"].isnull()]
| Kilometers_Driven | Seats | New_Price | mileage_num | engine_num | power_num | Location_Bangalore | Location_Chennai | Location_Coimbatore | Location_Delhi | Location_Hyderabad | Location_Jaipur | Location_Kochi | Location_Kolkata | Location_Mumbai | Location_Pune | Year_1999 | Year_2000 | Year_2001 | Year_2002 | Year_2003 | Year_2004 | Year_2005 | Year_2006 | Year_2007 | Year_2008 | Year_2009 | Year_2010 | Year_2011 | Year_2012 | Year_2013 | Year_2014 | Year_2015 | Year_2016 | Year_2017 | Year_2018 | Year_2019 | Fuel_Type_Diesel | Fuel_Type_Electric | Fuel_Type_LPG | Fuel_Type_Petrol | Transmission_Manual | Owner_Type_Fourth & Above | Owner_Type_Second | Owner_Type_Third | Brand_audi | Brand_bentley | Brand_bmw | Brand_chevrolet | Brand_datsun | Brand_fiat | Brand_force | Brand_ford | Brand_honda | Brand_hyundai | Brand_isuzu | Brand_jaguar | Brand_jeep | Brand_lamborghini | Brand_land | Brand_mahindra | Brand_maruti | Brand_mercedes-benz | Brand_mini | Brand_mitsubishi | Brand_nissan | Brand_porsche | Brand_renault | Brand_skoda | Brand_smart | Brand_tata | Brand_toyota | Brand_volkswagen | Brand_volvo | Model_1000 | Model_3 | Model_5 | Model_6 | Model_7 | Model_800 | Model_a | Model_a-star | Model_a3 | Model_a4 | Model_a6 | Model_a7 | Model_a8 | Model_accent | Model_accord | Model_alto | Model_amaze | Model_ameo | Model_aspire | Model_aveo | Model_avventura | Model_b | Model_baleno | Model_beat | Model_beetle | Model_bolero | Model_bolt | Model_boxster | Model_br-v | Model_brio | Model_brv | Model_c-class | Model_camry | Model_captiva | Model_captur | Model_cayenne | Model_cayman | Model_cedia | Model_celerio | Model_ciaz | Model_city | Model_civic | Model_cla | Model_classic | Model_cls-class | Model_clubman | Model_compass | Model_continental | Model_cooper | Model_corolla | Model_countryman | Model_cr-v | Model_creta | Model_crosspolo | Model_cruze | Model_d-max | Model_duster | Model_dzire | Model_e | Model_e-class | Model_ecosport | Model_eeco | Model_elantra | Model_elite | Model_endeavour | Model_enjoy | Model_eon | Model_ertiga | Model_esteem | Model_estilo | Model_etios | Model_evalia | Model_f | Model_fabia | Model_fiesta | Model_figo | Model_fluence | Model_fortuner | Model_fortwo | Model_freestyle | Model_fusion | Model_gallardo | Model_getz | Model_gl-class | Model_gla | Model_glc | Model_gle | Model_gls | Model_go | Model_grand | Model_grande | Model_hexa | Model_i10 | Model_i20 | Model_ignis | Model_ikon | Model_indica | Model_indigo | Model_innova | Model_jazz | Model_jeep | Model_jetta | Model_koleos | Model_kuv | Model_kwid | Model_lancer | Model_laura | Model_linea | Model_lodgy | Model_logan | Model_m-class | Model_manza | Model_micra | Model_mobilio | Model_montero | Model_mustang | Model_mux | Model_nano | Model_new | Model_nexon | Model_nuvosport | Model_octavia | Model_omni | Model_one | Model_optra | Model_outlander | Model_pajero | Model_panamera | Model_passat | Model_petra | Model_platinum | Model_polo | Model_prius | Model_pulse | Model_punto | Model_q3 | Model_q5 | Model_q7 | Model_qualis | Model_quanto | Model_r-class | Model_rapid | Model_redi | Model_redi-go | Model_renault | Model_ritz | Model_rover | Model_rs5 | Model_s | Model_s-class | Model_s-cross | Model_s60 | Model_s80 | Model_safari | Model_sail | Model_santa | Model_santro | Model_scala | Model_scorpio | Model_siena | Model_sl-class | Model_slc | Model_slk-class | Model_sonata | Model_spark | Model_ssangyong | Model_sumo | Model_sunny | Model_superb | Model_swift | Model_sx4 | Model_tavera | Model_teana | Model_terrano | Model_thar | Model_tiago | Model_tigor | Model_tiguan | Model_tt | Model_tucson | Model_tuv | Model_v40 | Model_vento | Model_venture | Model_verito | Model_verna | Model_versa | Model_vitara | Model_wagon | Model_wr-v | Model_wrv | Model_x-trail | Model_x1 | Model_x3 | Model_x5 | Model_x6 | Model_xc60 | Model_xc90 | Model_xcent | Model_xe | Model_xenon | Model_xf | Model_xj | Model_xuv300 | Model_xuv500 | Model_xylo | Model_yeti | Model_z4 | Model_zen | Model_zest | Brand | Model | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3882 | 40000.0 | NaN | 7.88 | 19.5 | 1061.0 | NaN | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maruti | estilo |
Maruti Estilo can accommodate 5 people.
x_val["Seats"] = x_val["Seats"].fillna(5.0)
# checking the missing values in x_val
print_missing_values_columns(x_val)
mileage_num 5 power_num 13 dtype: int64
# Same method is applied on test data
x_test["Seats"] = x_test.apply(lambda row: row["Seats"] if not pd.isna(row["Seats"]) else train_grouped_median.get((row["Brand"], row["Model"]), np.nan), axis=1)
# checking the missing values in x_test
print_missing_values_columns(x_test)
mileage_num 6 engine_num 2 power_num 14 dtype: int64
We will use a similar method to fill missing values for the Kilometers_Driven, mileage_num, engine_num, and power_num columns.
cols_list = ["Kilometers_Driven","mileage_num", "engine_num", "power_num"]
# Step 1: Calculate the median of specified columns in x_train grouped by Brand and Model
train_grouped_median = x_train.groupby(["Brand", "Model"])[cols_list].median()
# Step 2: Use the calculated median to fill missing values in specified columns for corresponding groups in train, validation and test data
for col in cols_list:
x_train[col] = x_train.apply(lambda row: row[col] if not pd.isna(row[col]) else train_grouped_median[col].get((row["Brand"], row["Model"]), np.nan), axis=1)
x_val[col] = x_val.apply(lambda row: row[col] if not pd.isna(row[col]) else train_grouped_median[col].get((row["Brand"], row["Model"]), np.nan), axis=1)
x_test[col] = x_test.apply(lambda row: row[col] if not pd.isna(row[col]) else train_grouped_median[col].get((row["Brand"], row["Model"]), np.nan), axis=1)
# checking the missing values in x_train
print_missing_values_columns(x_train)
mileage_num 7 power_num 9 dtype: int64
# checking the missing values in x_val
print_missing_values_columns(x_val)
mileage_num 1 power_num 1 dtype: int64
# checking the missing values in x_test
print_missing_values_columns(x_test)
mileage_num 1 power_num 1 dtype: int64
cols_list = ["mileage_num", "power_num"]
# Step 1: Calculate the median of specified columns in x_train grouped by Brand and Model
train_grouped_median = x_train.groupby(["Brand"])[cols_list].median()
# Step 2: Use the calculated median to fill missing values in specified columns for corresponding groups in train, validation and test data
for col in cols_list:
x_train[col] = x_train.apply(lambda row: row[col] if not pd.isna(row[col]) else train_grouped_median[col].get((row["Brand"]), np.nan), axis=1)
x_val[col] = x_val.apply(lambda row: row[col] if not pd.isna(row[col]) else train_grouped_median[col].get((row["Brand"]), np.nan), axis=1)
x_test[col] = x_test.apply(lambda row: row[col] if not pd.isna(row[col]) else train_grouped_median[col].get((row["Brand"]), np.nan), axis=1)
print_missing_values_columns(x_train)
mileage_num 1 power_num 1 dtype: int64
print_missing_values_columns(x_val)
Series([], dtype: float64)
print_missing_values_columns(x_test)
Series([], dtype: float64)
mileage_num and power_num) and all missing values in val and test data are imputed.cols_list = ["mileage_num", "power_num"]
for col in cols_list:
x_train[col] = x_train[col].fillna(df[col].median())
print_missing_values_columns(x_train)
Series([], dtype: float64)
# Dropping Brand and Model from train, validation, and test data as we already have dummy variables for them
x_train = x_train.drop(['Brand','Model'],axis=1)
x_val = x_val.drop(['Brand','Model'],axis=1)
x_test = x_test.drop(['Brand','Model'],axis=1)
# Define the columns to scale
num_columns = ["Kilometers_Driven", "Seats", "New_Price", "mileage_num", "engine_num", "power_num"]
# Initialize the StandardScaler
scaler = StandardScaler()
# Fit the scaler to the selected columns in the x_train data
scaler.fit(x_train[num_columns])
StandardScaler()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
StandardScaler()
Once the scaler object fits on the data using the fit() method, it stores the parameters (mean and standard deviation) for normalization based on the training data
We then use these parameters to normalize the validation and test data
This is similar to what we did in the Missing Value Treatment section.
# Transform selected columns in x_train, x_val, and x_test using the fitted scaler
x_train[num_columns] = scaler.transform(x_train[num_columns])
x_val[num_columns] = scaler.transform(x_val[num_columns])
x_test[num_columns] = scaler.transform(x_test[num_columns])
x_train.head()
| Kilometers_Driven | Seats | New_Price | mileage_num | engine_num | power_num | Location_Bangalore | Location_Chennai | Location_Coimbatore | Location_Delhi | Location_Hyderabad | Location_Jaipur | Location_Kochi | Location_Kolkata | Location_Mumbai | Location_Pune | Year_1999 | Year_2000 | Year_2001 | Year_2002 | Year_2003 | Year_2004 | Year_2005 | Year_2006 | Year_2007 | Year_2008 | Year_2009 | Year_2010 | Year_2011 | Year_2012 | Year_2013 | Year_2014 | Year_2015 | Year_2016 | Year_2017 | Year_2018 | Year_2019 | Fuel_Type_Diesel | Fuel_Type_Electric | Fuel_Type_LPG | Fuel_Type_Petrol | Transmission_Manual | Owner_Type_Fourth & Above | Owner_Type_Second | Owner_Type_Third | Brand_audi | Brand_bentley | Brand_bmw | Brand_chevrolet | Brand_datsun | Brand_fiat | Brand_force | Brand_ford | Brand_honda | Brand_hyundai | Brand_isuzu | Brand_jaguar | Brand_jeep | Brand_lamborghini | Brand_land | Brand_mahindra | Brand_maruti | Brand_mercedes-benz | Brand_mini | Brand_mitsubishi | Brand_nissan | Brand_porsche | Brand_renault | Brand_skoda | Brand_smart | Brand_tata | Brand_toyota | Brand_volkswagen | Brand_volvo | Model_1000 | Model_3 | Model_5 | Model_6 | Model_7 | Model_800 | Model_a | Model_a-star | Model_a3 | Model_a4 | Model_a6 | Model_a7 | Model_a8 | Model_accent | Model_accord | Model_alto | Model_amaze | Model_ameo | Model_aspire | Model_aveo | Model_avventura | Model_b | Model_baleno | Model_beat | Model_beetle | Model_bolero | Model_bolt | Model_boxster | Model_br-v | Model_brio | Model_brv | Model_c-class | Model_camry | Model_captiva | Model_captur | Model_cayenne | Model_cayman | Model_cedia | Model_celerio | Model_ciaz | Model_city | Model_civic | Model_cla | Model_classic | Model_cls-class | Model_clubman | Model_compass | Model_continental | Model_cooper | Model_corolla | Model_countryman | Model_cr-v | Model_creta | Model_crosspolo | Model_cruze | Model_d-max | Model_duster | Model_dzire | Model_e | Model_e-class | Model_ecosport | Model_eeco | Model_elantra | Model_elite | Model_endeavour | Model_enjoy | Model_eon | Model_ertiga | Model_esteem | Model_estilo | Model_etios | Model_evalia | Model_f | Model_fabia | Model_fiesta | Model_figo | Model_fluence | Model_fortuner | Model_fortwo | Model_freestyle | Model_fusion | Model_gallardo | Model_getz | Model_gl-class | Model_gla | Model_glc | Model_gle | Model_gls | Model_go | Model_grand | Model_grande | Model_hexa | Model_i10 | Model_i20 | Model_ignis | Model_ikon | Model_indica | Model_indigo | Model_innova | Model_jazz | Model_jeep | Model_jetta | Model_koleos | Model_kuv | Model_kwid | Model_lancer | Model_laura | Model_linea | Model_lodgy | Model_logan | Model_m-class | Model_manza | Model_micra | Model_mobilio | Model_montero | Model_mustang | Model_mux | Model_nano | Model_new | Model_nexon | Model_nuvosport | Model_octavia | Model_omni | Model_one | Model_optra | Model_outlander | Model_pajero | Model_panamera | Model_passat | Model_petra | Model_platinum | Model_polo | Model_prius | Model_pulse | Model_punto | Model_q3 | Model_q5 | Model_q7 | Model_qualis | Model_quanto | Model_r-class | Model_rapid | Model_redi | Model_redi-go | Model_renault | Model_ritz | Model_rover | Model_rs5 | Model_s | Model_s-class | Model_s-cross | Model_s60 | Model_s80 | Model_safari | Model_sail | Model_santa | Model_santro | Model_scala | Model_scorpio | Model_siena | Model_sl-class | Model_slc | Model_slk-class | Model_sonata | Model_spark | Model_ssangyong | Model_sumo | Model_sunny | Model_superb | Model_swift | Model_sx4 | Model_tavera | Model_teana | Model_terrano | Model_thar | Model_tiago | Model_tigor | Model_tiguan | Model_tt | Model_tucson | Model_tuv | Model_v40 | Model_vento | Model_venture | Model_verito | Model_verna | Model_versa | Model_vitara | Model_wagon | Model_wr-v | Model_wrv | Model_x-trail | Model_x1 | Model_x3 | Model_x5 | Model_x6 | Model_xc60 | Model_xc90 | Model_xcent | Model_xe | Model_xenon | Model_xf | Model_xj | Model_xuv300 | Model_xuv500 | Model_xylo | Model_yeti | Model_z4 | Model_zen | Model_zest | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4269 | -0.694078 | -0.351313 | -0.637638 | 1.136662 | -1.034356 | -0.841807 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2025 | -0.081329 | 2.126668 | -0.674075 | -0.765611 | -0.708133 | -0.731916 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 5776 | -0.469629 | -0.351313 | 1.297640 | -0.287665 | 0.563805 | 1.136412 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1710 | -0.365282 | -0.351313 | -0.517681 | 0.732429 | -0.706486 | -0.545692 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2363 | -0.978527 | -0.351313 | -0.572951 | 0.137969 | -0.706486 | -0.565973 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
def plot(history, name):
"""
Function to plot loss/accuracy
history: an object which stores the metrics and losses.
name: can be one of Loss or Accuracy
"""
fig, ax = plt.subplots() #Creating a subplot with figure and axes.
plt.plot(history.history[name]) #Plotting the train accuracy or train loss
plt.plot(history.history['val_'+name]) #Plotting the validation accuracy or validation loss
plt.title('Model ' + name.capitalize()) #Defining the title of the plot.
plt.ylabel(name.capitalize()) #Capitalizing the first letter.
plt.xlabel('Epoch') #Defining the label for the x-axis.
fig.legend(['Train', 'Validation'], loc="outside right upper") #Defining the legend, loc controls the position of the legend.
We'll create a dataframe to store the results from all the models we build
# function to compute adjusted R-squared
def adj_r2_score(predictors, targets, predictions):
r2 = r2_score(targets, predictions)
n = predictors.shape[0]
k = predictors.shape[1]
return 1 - ((1 - r2) * (n - 1) / (n - k - 1))
# function to compute MAPE
def mape_score(targets, predictions):
return np.mean(np.abs(targets - predictions) / targets) * 100
# function to compute different metrics to check performance of a neural network model
def model_performance(model,predictors,target):
"""
Function to compute different metrics to check regression model performance
model: regressor
predictors: independent variables
target: dependent variable
"""
# predicting using the independent variables
pred = model.predict(predictors).reshape(-1)
r2 = r2_score(target, pred) # to compute R-squared
adjr2 = adj_r2_score(predictors, target, pred) # to compute adjusted R-squared
rmse = np.sqrt(mean_squared_error(target, pred)) # to compute RMSE
mae = mean_absolute_error(target, pred) # to compute MAE
mape = mape_score(target, pred) # to compute MAPE
# creating a dataframe of metrics
df_perf = {
"RMSE": [rmse],
"MAE": [mae],
"R-squared": [r2],
"Adj. R-squared": [adjr2],
"MAPE": [mape]}
return df_perf
columns = ["# hidden layers","# neurons - hidden layer","activation function - hidden layer ","# epochs","batch size","optimizer","time(secs)","Train_loss","Valid_loss","Train_R-squared","Valid_R-squared"]
results = pd.DataFrame(columns=columns)
We'll use $R^2$ as our metric of choice for the model to optimize.
#Defining the list of metrics to be used for all the models.
metrics = [tf.keras.metrics.R2Score(name="r2_score")]
# clears the current Keras session, resetting all layers and models previously created, freeing up memory and resources.
tf.keras.backend.clear_session()
#Initializing the neural network
model = Sequential()
model.add(Dense(1,input_dim=x_train.shape[1]))
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 1) │ 285 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 285 (1.11 KB)
Trainable params: 285 (1.11 KB)
Non-trainable params: 0 (0.00 B)
optimizer = keras.optimizers.SGD() # defining SGD as the optimizer to be used
model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=metrics,run_eagerly=True)
epochs = 10
batch_size = x_train.shape[0]
import time # imports the time module
start = time.time()
history = model.fit(x_train, y_train, validation_data=(x_val,y_val) , batch_size=batch_size, epochs=epochs)
end=time.time()
Epoch 1/10 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 330ms/step - loss: 215.6193 - r2_score: -0.7024 - val_loss: 231.8262 - val_r2_score: -0.6387 Epoch 2/10 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 126ms/step - loss: 199.6886 - r2_score: -0.5766 - val_loss: 215.1222 - val_r2_score: -0.5207 Epoch 3/10 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 98ms/step - loss: 185.3213 - r2_score: -0.4632 - val_loss: 200.0059 - val_r2_score: -0.4138 Epoch 4/10 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 172.3545 - r2_score: -0.3608 - val_loss: 186.3157 - val_r2_score: -0.3170 Epoch 5/10 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 135ms/step - loss: 160.6429 - r2_score: -0.2683 - val_loss: 173.9070 - val_r2_score: -0.2293 Epoch 6/10 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 126ms/step - loss: 150.0572 - r2_score: -0.1848 - val_loss: 162.6510 - val_r2_score: -0.1498 Epoch 7/10 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 134ms/step - loss: 140.4819 - r2_score: -0.1092 - val_loss: 152.4325 - val_r2_score: -0.0775 Epoch 8/10 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 138ms/step - loss: 131.8141 - r2_score: -0.0407 - val_loss: 143.1481 - val_r2_score: -0.0119 Epoch 9/10 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 123.9616 - r2_score: 0.0213 - val_loss: 134.7058 - val_r2_score: 0.0478 Epoch 10/10 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 191ms/step - loss: 116.8424 - r2_score: 0.0775 - val_loss: 127.0229 - val_r2_score: 0.1021
print("Time taken in seconds ",end-start)
Time taken in seconds 1.6971805095672607
plot(history,'loss')
plot(history,'r2_score')
results.loc[0]=['-','-','-',epochs,batch_size,'GD',(end-start),history.history["loss"][-1],history.history["val_loss"][-1],history.history["r2_score"][-1],history.history["val_r2_score"][-1]]
results
| # hidden layers | # neurons - hidden layer | activation function - hidden layer | # epochs | batch size | optimizer | time(secs) | Train_loss | Valid_loss | Train_R-squared | Valid_R-squared | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | - | - | - | 10 | 4814 | GD | 1.697181 | 116.842415 | 127.022896 | 0.077489 | 0.102093 |
# clears the current Keras session, resetting all layers and models previously created, freeing up memory and resources.
tf.keras.backend.clear_session()
#Initializing the neural network
model = Sequential()
model.add(Dense(1,input_dim=x_train.shape[1]))
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 1) │ 285 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 285 (1.11 KB)
Trainable params: 285 (1.11 KB)
Non-trainable params: 0 (0.00 B)
optimizer = keras.optimizers.SGD() # defining SGD as the optimizer to be used
model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=metrics,run_eagerly=True)
epochs = 25
batch_size = x_train.shape[0]
start = time.time()
history = model.fit(x_train, y_train, validation_data=(x_val,y_val) , batch_size=batch_size, epochs=epochs)
end=time.time()
Epoch 1/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 240ms/step - loss: 212.9668 - r2_score: -0.5842 - val_loss: 229.4064 - val_r2_score: -0.6216 Epoch 2/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 179ms/step - loss: 197.4542 - r2_score: -0.5590 - val_loss: 213.0991 - val_r2_score: -0.5064 Epoch 3/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 103ms/step - loss: 183.4492 - r2_score: -0.4484 - val_loss: 198.3264 - val_r2_score: -0.4019 Epoch 4/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 124ms/step - loss: 170.7959 - r2_score: -0.3485 - val_loss: 184.9332 - val_r2_score: -0.3073 Epoch 5/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 99ms/step - loss: 159.3555 - r2_score: -0.2582 - val_loss: 172.7811 - val_r2_score: -0.2214 Epoch 6/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 129ms/step - loss: 149.0038 - r2_score: -0.1764 - val_loss: 161.7463 - val_r2_score: -0.1434 Epoch 7/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 152ms/step - loss: 139.6302 - r2_score: -0.1024 - val_loss: 151.7180 - val_r2_score: -0.0725 Epoch 8/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 131.1358 - r2_score: -0.0354 - val_loss: 142.5970 - val_r2_score: -0.0080 Epoch 9/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 162ms/step - loss: 123.4324 - r2_score: 0.0255 - val_loss: 134.2945 - val_r2_score: 0.0507 Epoch 10/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 116.4407 - r2_score: 0.0807 - val_loss: 126.7310 - val_r2_score: 0.1042 Epoch 11/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 137ms/step - loss: 110.0903 - r2_score: 0.1308 - val_loss: 119.8349 - val_r2_score: 0.1529 Epoch 12/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 144ms/step - loss: 104.3176 - r2_score: 0.1764 - val_loss: 113.5422 - val_r2_score: 0.1974 Epoch 13/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 104ms/step - loss: 99.0660 - r2_score: 0.2178 - val_loss: 107.7954 - val_r2_score: 0.2380 Epoch 14/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 131ms/step - loss: 94.2847 - r2_score: 0.2556 - val_loss: 102.5428 - val_r2_score: 0.2751 Epoch 15/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 89.9281 - r2_score: 0.2900 - val_loss: 97.7379 - val_r2_score: 0.3091 Epoch 16/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 184ms/step - loss: 85.9551 - r2_score: 0.3214 - val_loss: 93.3388 - val_r2_score: 0.3402 Epoch 17/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 244ms/step - loss: 82.3291 - r2_score: 0.3500 - val_loss: 89.3079 - val_r2_score: 0.3687 Epoch 18/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 138ms/step - loss: 79.0171 - r2_score: 0.3761 - val_loss: 85.6112 - val_r2_score: 0.3948 Epoch 19/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 169ms/step - loss: 75.9892 - r2_score: 0.4000 - val_loss: 82.2183 - val_r2_score: 0.4188 Epoch 20/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 250ms/step - loss: 73.2189 - r2_score: 0.4219 - val_loss: 79.1014 - val_r2_score: 0.4408 Epoch 21/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 145ms/step - loss: 70.6820 - r2_score: 0.4419 - val_loss: 76.2357 - val_r2_score: 0.4611 Epoch 22/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 106ms/step - loss: 68.3569 - r2_score: 0.4603 - val_loss: 73.5987 - val_r2_score: 0.4797 Epoch 23/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 66.2241 - r2_score: 0.4771 - val_loss: 71.1701 - val_r2_score: 0.4969 Epoch 24/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 64.2660 - r2_score: 0.4926 - val_loss: 68.9315 - val_r2_score: 0.5127 Epoch 25/25 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 141ms/step - loss: 62.4666 - r2_score: 0.5068 - val_loss: 66.8663 - val_r2_score: 0.5273
print("Time taken in seconds ",end-start)
Time taken in seconds 3.8399524688720703
plot(history,'loss')
plot(history,'r2_score')
results.loc[1]=['-','-','-',epochs,batch_size,'GD',(end-start),history.history["loss"][-1],history.history["val_loss"][-1],history.history["r2_score"][-1],history.history["val_r2_score"][-1]]
results
| # hidden layers | # neurons - hidden layer | activation function - hidden layer | # epochs | batch size | optimizer | time(secs) | Train_loss | Valid_loss | Train_R-squared | Valid_R-squared | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | - | - | - | 10 | 4814 | GD | 1.697181 | 116.842415 | 127.022896 | 0.077489 | 0.102093 |
| 1 | - | - | - | 25 | 4814 | GD | 3.839952 | 62.466640 | 66.866333 | 0.506804 | 0.527331 |
# clears the current Keras session, resetting all layers and models previously created, freeing up memory and resources.
tf.keras.backend.clear_session()
#Initializing the neural network
model = Sequential()
model.add(Dense(1,input_dim=x_train.shape[1]))
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 1) │ 285 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 285 (1.11 KB)
Trainable params: 285 (1.11 KB)
Non-trainable params: 0 (0.00 B)
optimizer = keras.optimizers.SGD() # defining SGD as the optimizer to be used
model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=metrics,run_eagerly=True)
epochs = 25
batch_size = 32
start = time.time()
history = model.fit(x_train, y_train, validation_data=(x_val,y_val) , batch_size=batch_size, epochs=epochs)
end=time.time()
Epoch 1/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 4s 28ms/step - loss: 86.8465 - r2_score: 0.4432 - val_loss: 35.5213 - val_r2_score: 0.7489 Epoch 2/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 4s 23ms/step - loss: 33.6671 - r2_score: 0.7365 - val_loss: 32.9409 - val_r2_score: 0.7671 Epoch 3/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 3s 23ms/step - loss: 31.4131 - r2_score: 0.7541 - val_loss: 31.3458 - val_r2_score: 0.7784 Epoch 4/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 6s 30ms/step - loss: 30.0467 - r2_score: 0.7648 - val_loss: 30.2675 - val_r2_score: 0.7860 Epoch 5/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 4s 22ms/step - loss: 29.1110 - r2_score: 0.7721 - val_loss: 29.4811 - val_r2_score: 0.7916 Epoch 6/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 5s 23ms/step - loss: 28.4153 - r2_score: 0.7775 - val_loss: 28.8686 - val_r2_score: 0.7959 Epoch 7/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 5s 30ms/step - loss: 27.8644 - r2_score: 0.7818 - val_loss: 28.3658 - val_r2_score: 0.7995 Epoch 8/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 5s 27ms/step - loss: 27.4077 - r2_score: 0.7854 - val_loss: 27.9365 - val_r2_score: 0.8025 Epoch 9/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 4s 23ms/step - loss: 27.0161 - r2_score: 0.7884 - val_loss: 27.5594 - val_r2_score: 0.8052 Epoch 10/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 4s 30ms/step - loss: 26.6721 - r2_score: 0.7911 - val_loss: 27.2215 - val_r2_score: 0.8076 Epoch 11/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 4s 23ms/step - loss: 26.3648 - r2_score: 0.7935 - val_loss: 26.9147 - val_r2_score: 0.8097 Epoch 12/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 6s 32ms/step - loss: 26.0867 - r2_score: 0.7957 - val_loss: 26.6333 - val_r2_score: 0.8117 Epoch 13/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 5s 28ms/step - loss: 25.8324 - r2_score: 0.7976 - val_loss: 26.3735 - val_r2_score: 0.8136 Epoch 14/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 4s 23ms/step - loss: 25.5982 - r2_score: 0.7995 - val_loss: 26.1323 - val_r2_score: 0.8153 Epoch 15/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 8s 39ms/step - loss: 25.3811 - r2_score: 0.8012 - val_loss: 25.9075 - val_r2_score: 0.8169 Epoch 16/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 3s 23ms/step - loss: 25.1790 - r2_score: 0.8027 - val_loss: 25.6974 - val_r2_score: 0.8183 Epoch 17/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 5s 23ms/step - loss: 24.9900 - r2_score: 0.8042 - val_loss: 25.5004 - val_r2_score: 0.8197 Epoch 18/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 6s 28ms/step - loss: 24.8125 - r2_score: 0.8056 - val_loss: 25.3153 - val_r2_score: 0.8210 Epoch 19/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 3s 23ms/step - loss: 24.6455 - r2_score: 0.8069 - val_loss: 25.1410 - val_r2_score: 0.8223 Epoch 20/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 6s 26ms/step - loss: 24.4877 - r2_score: 0.8081 - val_loss: 24.9766 - val_r2_score: 0.8234 Epoch 21/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 4s 28ms/step - loss: 24.3385 - r2_score: 0.8093 - val_loss: 24.8212 - val_r2_score: 0.8245 Epoch 22/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 4s 23ms/step - loss: 24.1969 - r2_score: 0.8104 - val_loss: 24.6741 - val_r2_score: 0.8256 Epoch 23/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 6s 27ms/step - loss: 24.0624 - r2_score: 0.8114 - val_loss: 24.5347 - val_r2_score: 0.8266 Epoch 24/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 4s 23ms/step - loss: 23.9343 - r2_score: 0.8124 - val_loss: 24.4023 - val_r2_score: 0.8275 Epoch 25/25 151/151 ━━━━━━━━━━━━━━━━━━━━ 5s 23ms/step - loss: 23.8121 - r2_score: 0.8134 - val_loss: 24.2764 - val_r2_score: 0.8284
print("Time taken in seconds ",end-start)
Time taken in seconds 121.4993999004364
plot(history,'loss')
plot(history,'r2_score')
results.loc[2]=['-','-','-',epochs,batch_size,'SGD',(end-start),history.history["loss"][-1],history.history["val_loss"][-1],history.history["r2_score"][-1],history.history["val_r2_score"][-1]]
results
| # hidden layers | # neurons - hidden layer | activation function - hidden layer | # epochs | batch size | optimizer | time(secs) | Train_loss | Valid_loss | Train_R-squared | Valid_R-squared | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | - | - | - | 10 | 4814 | GD | 1.697181 | 116.842415 | 127.022896 | 0.077489 | 0.102093 |
| 1 | - | - | - | 25 | 4814 | GD | 3.839952 | 62.466640 | 66.866333 | 0.506804 | 0.527331 |
| 2 | - | - | - | 25 | 32 | SGD | 121.499400 | 25.865023 | 24.276419 | 0.795787 | 0.828393 |
# clears the current Keras session, resetting all layers and models previously created, freeing up memory and resources.
tf.keras.backend.clear_session()
#Initializing the neural network
model = Sequential()
model.add(Dense(1,input_dim=x_train.shape[1]))
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 1) │ 285 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 285 (1.11 KB)
Trainable params: 285 (1.11 KB)
Non-trainable params: 0 (0.00 B)
optimizer = keras.optimizers.SGD() # defining SGD as the optimizer to be used
model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=metrics,run_eagerly=True)
epochs = 25
batch_size = 64
start = time.time()
history = model.fit(x_train, y_train, validation_data=(x_val,y_val) , batch_size=batch_size, epochs=epochs)
end=time.time()
Epoch 1/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 31ms/step - loss: 111.9825 - r2_score: 0.3936 - val_loss: 38.6424 - val_r2_score: 0.7268 Epoch 2/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 25ms/step - loss: 36.7211 - r2_score: 0.7141 - val_loss: 35.5817 - val_r2_score: 0.7485 Epoch 3/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 23ms/step - loss: 34.1328 - r2_score: 0.7338 - val_loss: 34.1166 - val_r2_score: 0.7588 Epoch 4/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 24ms/step - loss: 32.7173 - r2_score: 0.7448 - val_loss: 33.0035 - val_r2_score: 0.7667 Epoch 5/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 24ms/step - loss: 31.6776 - r2_score: 0.7529 - val_loss: 32.1211 - val_r2_score: 0.7729 Epoch 6/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 29ms/step - loss: 30.8699 - r2_score: 0.7592 - val_loss: 31.4058 - val_r2_score: 0.7780 Epoch 7/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - loss: 30.2217 - r2_score: 0.7642 - val_loss: 30.8142 - val_r2_score: 0.7822 Epoch 8/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 23ms/step - loss: 29.6879 - r2_score: 0.7684 - val_loss: 30.3152 - val_r2_score: 0.7857 Epoch 9/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 23ms/step - loss: 29.2380 - r2_score: 0.7718 - val_loss: 29.8866 - val_r2_score: 0.7887 Epoch 10/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 23ms/step - loss: 28.8512 - r2_score: 0.7748 - val_loss: 29.5122 - val_r2_score: 0.7914 Epoch 11/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 23ms/step - loss: 28.5128 - r2_score: 0.7775 - val_loss: 29.1803 - val_r2_score: 0.7937 Epoch 12/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - loss: 28.2123 - r2_score: 0.7798 - val_loss: 28.8820 - val_r2_score: 0.7958 Epoch 13/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 24ms/step - loss: 27.9418 - r2_score: 0.7819 - val_loss: 28.6108 - val_r2_score: 0.7978 Epoch 14/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 25ms/step - loss: 27.6958 - r2_score: 0.7838 - val_loss: 28.3617 - val_r2_score: 0.7995 Epoch 15/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 32ms/step - loss: 27.4698 - r2_score: 0.7856 - val_loss: 28.1310 - val_r2_score: 0.8011 Epoch 16/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 23ms/step - loss: 27.2606 - r2_score: 0.7872 - val_loss: 27.9158 - val_r2_score: 0.8027 Epoch 17/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 30ms/step - loss: 27.0657 - r2_score: 0.7887 - val_loss: 27.7139 - val_r2_score: 0.8041 Epoch 18/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - loss: 26.8830 - r2_score: 0.7901 - val_loss: 27.5234 - val_r2_score: 0.8054 Epoch 19/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 24ms/step - loss: 26.7110 - r2_score: 0.7915 - val_loss: 27.3431 - val_r2_score: 0.8067 Epoch 20/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 24ms/step - loss: 26.5485 - r2_score: 0.7927 - val_loss: 27.1717 - val_r2_score: 0.8079 Epoch 21/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 23ms/step - loss: 26.3942 - r2_score: 0.7939 - val_loss: 27.0083 - val_r2_score: 0.8091 Epoch 22/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 23ms/step - loss: 26.2475 - r2_score: 0.7951 - val_loss: 26.8523 - val_r2_score: 0.8102 Epoch 23/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 29ms/step - loss: 26.1075 - r2_score: 0.7961 - val_loss: 26.7029 - val_r2_score: 0.8112 Epoch 24/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - loss: 25.9737 - r2_score: 0.7972 - val_loss: 26.5596 - val_r2_score: 0.8123 Epoch 25/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 24ms/step - loss: 25.8455 - r2_score: 0.7982 - val_loss: 26.4221 - val_r2_score: 0.8132
print("Time taken in seconds ",end-start)
Time taken in seconds 57.31793189048767
plot(history,'loss')
plot(history,'r2_score')
results.loc[3]=['-','-','-',epochs,batch_size,'SGD',(end-start),history.history["loss"][-1],history.history["val_loss"][-1],history.history["r2_score"][-1],history.history["val_r2_score"][-1]]
results
| # hidden layers | # neurons - hidden layer | activation function - hidden layer | # epochs | batch size | optimizer | time(secs) | Train_loss | Valid_loss | Train_R-squared | Valid_R-squared | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | - | - | - | 10 | 4814 | GD | 1.697181 | 116.842415 | 127.022896 | 0.077489 | 0.102093 |
| 1 | - | - | - | 25 | 4814 | GD | 3.839952 | 62.466640 | 66.866333 | 0.506804 | 0.527331 |
| 2 | - | - | - | 25 | 32 | SGD | 121.499400 | 25.865023 | 24.276419 | 0.795787 | 0.828393 |
| 3 | - | - | - | 25 | 64 | SGD | 57.317932 | 27.897743 | 26.422087 | 0.779738 | 0.813226 |
# clears the current Keras session, resetting all layers and models previously created, freeing up memory and resources.
tf.keras.backend.clear_session()
#Initializing the neural network
model = Sequential()
model.add(Dense(128,activation="sigmoid",input_dim=x_train.shape[1]))
model.add(Dense(1))
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 128) │ 36,480 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 1) │ 129 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 36,609 (143.00 KB)
Trainable params: 36,609 (143.00 KB)
Non-trainable params: 0 (0.00 B)
optimizer = keras.optimizers.SGD() # defining SGD as the optimizer to be used
model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=metrics,run_eagerly=True)
epochs = 25
batch_size = 64
start = time.time()
history = model.fit(x_train, y_train, validation_data=(x_val,y_val) , batch_size=batch_size, epochs=epochs)
end=time.time()
Epoch 1/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 29ms/step - loss: 100.2605 - r2_score: 0.4408 - val_loss: 36.9329 - val_r2_score: 0.7389 Epoch 2/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 36.0253 - r2_score: 0.7199 - val_loss: 33.1919 - val_r2_score: 0.7654 Epoch 3/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 34ms/step - loss: 31.8483 - r2_score: 0.7519 - val_loss: 30.7305 - val_r2_score: 0.7828 Epoch 4/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 30ms/step - loss: 29.2322 - r2_score: 0.7721 - val_loss: 28.7463 - val_r2_score: 0.7968 Epoch 5/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 29ms/step - loss: 27.2142 - r2_score: 0.7876 - val_loss: 27.1094 - val_r2_score: 0.8084 Epoch 6/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - loss: 25.5121 - r2_score: 0.8008 - val_loss: 25.6944 - val_r2_score: 0.8184 Epoch 7/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 35ms/step - loss: 23.9985 - r2_score: 0.8125 - val_loss: 24.4229 - val_r2_score: 0.8274 Epoch 8/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 28ms/step - loss: 22.6255 - r2_score: 0.8231 - val_loss: 23.2575 - val_r2_score: 0.8356 Epoch 9/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - loss: 21.3786 - r2_score: 0.8328 - val_loss: 22.1796 - val_r2_score: 0.8432 Epoch 10/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 27ms/step - loss: 20.2531 - r2_score: 0.8415 - val_loss: 21.1771 - val_r2_score: 0.8503 Epoch 11/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - loss: 19.2443 - r2_score: 0.8494 - val_loss: 20.2422 - val_r2_score: 0.8569 Epoch 12/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 35ms/step - loss: 18.3453 - r2_score: 0.8563 - val_loss: 19.3709 - val_r2_score: 0.8631 Epoch 13/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 29ms/step - loss: 17.5467 - r2_score: 0.8626 - val_loss: 18.5624 - val_r2_score: 0.8688 Epoch 14/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 29ms/step - loss: 16.8377 - r2_score: 0.8681 - val_loss: 17.8170 - val_r2_score: 0.8741 Epoch 15/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - loss: 16.2078 - r2_score: 0.8730 - val_loss: 17.1348 - val_r2_score: 0.8789 Epoch 16/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 41ms/step - loss: 15.6470 - r2_score: 0.8774 - val_loss: 16.5148 - val_r2_score: 0.8833 Epoch 17/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 15.1466 - r2_score: 0.8813 - val_loss: 15.9552 - val_r2_score: 0.8872 Epoch 18/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - loss: 14.6988 - r2_score: 0.8848 - val_loss: 15.4533 - val_r2_score: 0.8908 Epoch 19/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - loss: 14.2969 - r2_score: 0.8879 - val_loss: 15.0056 - val_r2_score: 0.8939 Epoch 20/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 31ms/step - loss: 13.9348 - r2_score: 0.8908 - val_loss: 14.6080 - val_r2_score: 0.8967 Epoch 21/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 38ms/step - loss: 13.6071 - r2_score: 0.8934 - val_loss: 14.2554 - val_r2_score: 0.8992 Epoch 22/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 29ms/step - loss: 13.3093 - r2_score: 0.8957 - val_loss: 13.9424 - val_r2_score: 0.9014 Epoch 23/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 13.0374 - r2_score: 0.8979 - val_loss: 13.6638 - val_r2_score: 0.9034 Epoch 24/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 12.7880 - r2_score: 0.8998 - val_loss: 13.4147 - val_r2_score: 0.9052 Epoch 25/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 39ms/step - loss: 12.5583 - r2_score: 0.9016 - val_loss: 13.1910 - val_r2_score: 0.9068
print("Time taken in seconds ",end-start)
Time taken in seconds 74.1546471118927
plot(history,'loss')
plot(history,'r2_score')
results.loc[4]=[1,128,'sigmoid',epochs,batch_size,'SGD',(end-start),history.history["loss"][-1],history.history["val_loss"][-1],history.history["r2_score"][-1],history.history["val_r2_score"][-1]]
results
| # hidden layers | # neurons - hidden layer | activation function - hidden layer | # epochs | batch size | optimizer | time(secs) | Train_loss | Valid_loss | Train_R-squared | Valid_R-squared | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | - | - | - | 10 | 4814 | GD | 1.697181 | 116.842415 | 127.022896 | 0.077489 | 0.102093 |
| 1 | - | - | - | 25 | 4814 | GD | 3.839952 | 62.466640 | 66.866333 | 0.506804 | 0.527331 |
| 2 | - | - | - | 25 | 32 | SGD | 121.499400 | 25.865023 | 24.276419 | 0.795787 | 0.828393 |
| 3 | - | - | - | 25 | 64 | SGD | 57.317932 | 27.897743 | 26.422087 | 0.779738 | 0.813226 |
| 4 | 1 | 128 | sigmoid | 25 | 64 | SGD | 74.154647 | 13.616706 | 13.191008 | 0.892491 | 0.906755 |
# clears the current Keras session, resetting all layers and models previously created, freeing up memory and resources.
tf.keras.backend.clear_session()
#Initializing the neural network
model = Sequential()
model.add(Dense(128,activation="tanh",input_dim=x_train.shape[1]))
model.add(Dense(1))
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 128) │ 36,480 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 1) │ 129 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 36,609 (143.00 KB)
Trainable params: 36,609 (143.00 KB)
Non-trainable params: 0 (0.00 B)
optimizer = keras.optimizers.SGD() # defining SGD as the optimizer to be used
model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=metrics,run_eagerly=True)
epochs = 25
batch_size = 64
start = time.time()
history = model.fit(x_train, y_train, validation_data=(x_val,y_val) , batch_size=batch_size, epochs=epochs)
end=time.time()
Epoch 1/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 40ms/step - loss: 79.5473 - r2_score: 0.5943 - val_loss: 35.1616 - val_r2_score: 0.7514 Epoch 2/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - loss: 30.2468 - r2_score: 0.7651 - val_loss: 27.9162 - val_r2_score: 0.8027 Epoch 3/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 36ms/step - loss: 25.7403 - r2_score: 0.7997 - val_loss: 23.4933 - val_r2_score: 0.8339 Epoch 4/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 28ms/step - loss: 22.1494 - r2_score: 0.8272 - val_loss: 21.4471 - val_r2_score: 0.8484 Epoch 5/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 29ms/step - loss: 19.6231 - r2_score: 0.8466 - val_loss: 19.7989 - val_r2_score: 0.8600 Epoch 6/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 17.8089 - r2_score: 0.8607 - val_loss: 18.0529 - val_r2_score: 0.8724 Epoch 7/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 31ms/step - loss: 16.4196 - r2_score: 0.8715 - val_loss: 16.4278 - val_r2_score: 0.8839 Epoch 8/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 38ms/step - loss: 15.3019 - r2_score: 0.8802 - val_loss: 15.0046 - val_r2_score: 0.8939 Epoch 9/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 14.3681 - r2_score: 0.8874 - val_loss: 13.8965 - val_r2_score: 0.9018 Epoch 10/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 13.5662 - r2_score: 0.8936 - val_loss: 13.0059 - val_r2_score: 0.9081 Epoch 11/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 12.8818 - r2_score: 0.8989 - val_loss: 12.3403 - val_r2_score: 0.9128 Epoch 12/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - loss: 12.2779 - r2_score: 0.9036 - val_loss: 11.8500 - val_r2_score: 0.9162 Epoch 13/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 41ms/step - loss: 11.7520 - r2_score: 0.9077 - val_loss: 11.4965 - val_r2_score: 0.9187 Epoch 14/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 28ms/step - loss: 11.2884 - r2_score: 0.9113 - val_loss: 11.2293 - val_r2_score: 0.9206 Epoch 15/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 27ms/step - loss: 10.8749 - r2_score: 0.9146 - val_loss: 11.0060 - val_r2_score: 0.9222 Epoch 16/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - loss: 10.5038 - r2_score: 0.9175 - val_loss: 10.8015 - val_r2_score: 0.9236 Epoch 17/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 35ms/step - loss: 10.1706 - r2_score: 0.9201 - val_loss: 10.6096 - val_r2_score: 0.9250 Epoch 18/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 28ms/step - loss: 9.8682 - r2_score: 0.9225 - val_loss: 10.4366 - val_r2_score: 0.9262 Epoch 19/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - loss: 9.5903 - r2_score: 0.9247 - val_loss: 10.2879 - val_r2_score: 0.9273 Epoch 20/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 9.3318 - r2_score: 0.9267 - val_loss: 10.1541 - val_r2_score: 0.9282 Epoch 21/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 37ms/step - loss: 9.0828 - r2_score: 0.9287 - val_loss: 10.0277 - val_r2_score: 0.9291 Epoch 22/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 28ms/step - loss: 8.8366 - r2_score: 0.9306 - val_loss: 9.9145 - val_r2_score: 0.9299 Epoch 23/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 8.5971 - r2_score: 0.9325 - val_loss: 9.8223 - val_r2_score: 0.9306 Epoch 24/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - loss: 8.3698 - r2_score: 0.9343 - val_loss: 9.7551 - val_r2_score: 0.9310 Epoch 25/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 35ms/step - loss: 8.1562 - r2_score: 0.9360 - val_loss: 9.7151 - val_r2_score: 0.9313
print("Time taken in seconds ",end-start)
Time taken in seconds 78.1450309753418
plot(history,'loss')
plot(history,'r2_score')
results.loc[5]=[1,128,'tanh',epochs,batch_size,'SGD',(end-start),history.history["loss"][-1],history.history["val_loss"][-1],history.history["r2_score"][-1],history.history["val_r2_score"][-1]]
results
| # hidden layers | # neurons - hidden layer | activation function - hidden layer | # epochs | batch size | optimizer | time(secs) | Train_loss | Valid_loss | Train_R-squared | Valid_R-squared | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | - | - | - | 10 | 4814 | GD | 1.697181 | 116.842415 | 127.022896 | 0.077489 | 0.102093 |
| 1 | - | - | - | 25 | 4814 | GD | 3.839952 | 62.466640 | 66.866333 | 0.506804 | 0.527331 |
| 2 | - | - | - | 25 | 32 | SGD | 121.499400 | 25.865023 | 24.276419 | 0.795787 | 0.828393 |
| 3 | - | - | - | 25 | 64 | SGD | 57.317932 | 27.897743 | 26.422087 | 0.779738 | 0.813226 |
| 4 | 1 | 128 | sigmoid | 25 | 64 | SGD | 74.154647 | 13.616706 | 13.191008 | 0.892491 | 0.906755 |
| 5 | 1 | 128 | tanh | 25 | 64 | SGD | 78.145031 | 8.859550 | 9.715087 | 0.930051 | 0.931325 |
# clears the current Keras session, resetting all layers and models previously created, freeing up memory and resources.
tf.keras.backend.clear_session()
#Initializing the neural network
model = Sequential()
model.add(Dense(128,activation="relu",input_dim=x_train.shape[1]))
model.add(Dense(1))
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 128) │ 36,480 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 1) │ 129 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 36,609 (143.00 KB)
Trainable params: 36,609 (143.00 KB)
Non-trainable params: 0 (0.00 B)
optimizer = keras.optimizers.SGD() # defining SGD as the optimizer to be used
model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=metrics,run_eagerly=True)
epochs = 25
batch_size = 64
start = time.time()
history = model.fit(x_train, y_train, validation_data=(x_val,y_val) , batch_size=batch_size, epochs=epochs)
end=time.time()
Epoch 1/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 30ms/step - loss: 78.1409 - r2_score: 0.6149 - val_loss: 22.6298 - val_r2_score: 0.8400 Epoch 2/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 24.4104 - r2_score: 0.8088 - val_loss: 20.3330 - val_r2_score: 0.8563 Epoch 3/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - loss: 20.8962 - r2_score: 0.8361 - val_loss: 20.2712 - val_r2_score: 0.8567 Epoch 4/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 40ms/step - loss: 18.9292 - r2_score: 0.8516 - val_loss: 19.0919 - val_r2_score: 0.8650 Epoch 5/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 29ms/step - loss: 17.5839 - r2_score: 0.8622 - val_loss: 18.7435 - val_r2_score: 0.8675 Epoch 6/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 31ms/step - loss: 16.6700 - r2_score: 0.8696 - val_loss: 18.1998 - val_r2_score: 0.8713 Epoch 7/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 15.9100 - r2_score: 0.8757 - val_loss: 18.0499 - val_r2_score: 0.8724 Epoch 8/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 36ms/step - loss: 15.2037 - r2_score: 0.8814 - val_loss: 17.2522 - val_r2_score: 0.8780 Epoch 9/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 41ms/step - loss: 14.6969 - r2_score: 0.8855 - val_loss: 16.7128 - val_r2_score: 0.8819 Epoch 10/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - loss: 14.1779 - r2_score: 0.8896 - val_loss: 16.0255 - val_r2_score: 0.8867 Epoch 11/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - loss: 13.7966 - r2_score: 0.8926 - val_loss: 16.2979 - val_r2_score: 0.8848 Epoch 12/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 36ms/step - loss: 13.3378 - r2_score: 0.8964 - val_loss: 15.2482 - val_r2_score: 0.8922 Epoch 13/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 35ms/step - loss: 13.0258 - r2_score: 0.8988 - val_loss: 15.3595 - val_r2_score: 0.8914 Epoch 14/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 30ms/step - loss: 12.6184 - r2_score: 0.9021 - val_loss: 14.2531 - val_r2_score: 0.8992 Epoch 15/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 12.3351 - r2_score: 0.9042 - val_loss: 14.5103 - val_r2_score: 0.8974 Epoch 16/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 34ms/step - loss: 11.9488 - r2_score: 0.9074 - val_loss: 14.1340 - val_r2_score: 0.9001 Epoch 17/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 30ms/step - loss: 11.7498 - r2_score: 0.9089 - val_loss: 15.1942 - val_r2_score: 0.8926 Epoch 18/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 11.6430 - r2_score: 0.9100 - val_loss: 14.6469 - val_r2_score: 0.8965 Epoch 19/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - loss: 11.2995 - r2_score: 0.9126 - val_loss: 14.3911 - val_r2_score: 0.8983 Epoch 20/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 32ms/step - loss: 10.9995 - r2_score: 0.9149 - val_loss: 15.4576 - val_r2_score: 0.8907 Epoch 21/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 37ms/step - loss: 11.0765 - r2_score: 0.9145 - val_loss: 13.6486 - val_r2_score: 0.9035 Epoch 22/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 28ms/step - loss: 10.4216 - r2_score: 0.9194 - val_loss: 14.7097 - val_r2_score: 0.8960 Epoch 23/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 27ms/step - loss: 10.3984 - r2_score: 0.9196 - val_loss: 14.1583 - val_r2_score: 0.8999 Epoch 24/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - loss: 10.1541 - r2_score: 0.9215 - val_loss: 14.2825 - val_r2_score: 0.8990 Epoch 25/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 40ms/step - loss: 9.9332 - r2_score: 0.9232 - val_loss: 14.3455 - val_r2_score: 0.8986
print("Time taken in seconds ",end-start)
Time taken in seconds 78.64779210090637
plot(history,'loss')
plot(history,'r2_score')
results.loc[6]=[1,128,'relu',epochs,batch_size,'SGD',(end-start),history.history["loss"][-1],history.history["val_loss"][-1],history.history["r2_score"][-1],history.history["val_r2_score"][-1]]
results
| # hidden layers | # neurons - hidden layer | activation function - hidden layer | # epochs | batch size | optimizer | time(secs) | Train_loss | Valid_loss | Train_R-squared | Valid_R-squared | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | - | - | - | 10 | 4814 | GD | 1.697181 | 116.842415 | 127.022896 | 0.077489 | 0.102093 |
| 1 | - | - | - | 25 | 4814 | GD | 3.839952 | 62.466640 | 66.866333 | 0.506804 | 0.527331 |
| 2 | - | - | - | 25 | 32 | SGD | 121.499400 | 25.865023 | 24.276419 | 0.795787 | 0.828393 |
| 3 | - | - | - | 25 | 64 | SGD | 57.317932 | 27.897743 | 26.422087 | 0.779738 | 0.813226 |
| 4 | 1 | 128 | sigmoid | 25 | 64 | SGD | 74.154647 | 13.616706 | 13.191008 | 0.892491 | 0.906755 |
| 5 | 1 | 128 | tanh | 25 | 64 | SGD | 78.145031 | 8.859550 | 9.715087 | 0.930051 | 0.931325 |
| 6 | 1 | 128 | relu | 25 | 64 | SGD | 78.647792 | 9.730357 | 14.345458 | 0.923175 | 0.898594 |
# clears the current Keras session, resetting all layers and models previously created, freeing up memory and resources.
tf.keras.backend.clear_session()
#Initializing the neural network
model = Sequential()
model.add(Dense(128,activation="relu",input_dim=x_train.shape[1]))
model.add(Dense(32,activation="relu"))
model.add(Dense(1))
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 128) │ 36,480 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 32) │ 4,128 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_2 (Dense) │ (None, 1) │ 33 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 40,641 (158.75 KB)
Trainable params: 40,641 (158.75 KB)
Non-trainable params: 0 (0.00 B)
optimizer = keras.optimizers.SGD() # defining SGD as the optimizer to be used
model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=metrics,run_eagerly=True)
epochs = 25
batch_size = 64
start = time.time()
history = model.fit(x_train, y_train, validation_data=(x_val,y_val) , batch_size=batch_size, epochs=epochs)
end=time.time()
Epoch 1/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 36ms/step - loss: 119.0312 - r2_score: 0.3395 - val_loss: 174.7490 - val_r2_score: -0.2353 Epoch 2/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - loss: 141.2652 - r2_score: -0.0883 - val_loss: 125.1895 - val_r2_score: 0.1151 Epoch 3/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 36ms/step - loss: 103.3704 - r2_score: 0.2047 - val_loss: 92.6942 - val_r2_score: 0.3448 Epoch 4/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 33ms/step - loss: 80.2653 - r2_score: 0.3824 - val_loss: 38.2387 - val_r2_score: 0.7297 Epoch 5/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 32ms/step - loss: 51.9535 - r2_score: 0.5936 - val_loss: 71.4642 - val_r2_score: 0.4948 Epoch 6/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 43ms/step - loss: 74.8095 - r2_score: 0.4208 - val_loss: 64.5067 - val_r2_score: 0.5440 Epoch 7/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 37ms/step - loss: 72.0067 - r2_score: 0.4423 - val_loss: 49.9014 - val_r2_score: 0.6473 Epoch 8/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 34ms/step - loss: 58.6729 - r2_score: 0.5446 - val_loss: 59.2428 - val_r2_score: 0.5812 Epoch 9/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 6s 42ms/step - loss: 62.5613 - r2_score: 0.5163 - val_loss: 57.4270 - val_r2_score: 0.5941 Epoch 10/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 36ms/step - loss: 63.6383 - r2_score: 0.5072 - val_loss: 57.5613 - val_r2_score: 0.5931 Epoch 11/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - loss: 57.4395 - r2_score: 0.5558 - val_loss: 47.6287 - val_r2_score: 0.6633 Epoch 12/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - loss: 48.6959 - r2_score: 0.6236 - val_loss: 41.1300 - val_r2_score: 0.7093 Epoch 13/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 32ms/step - loss: 42.1436 - r2_score: 0.6749 - val_loss: 37.0572 - val_r2_score: 0.7380 Epoch 14/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 6s 76ms/step - loss: 39.6427 - r2_score: 0.6938 - val_loss: 33.9403 - val_r2_score: 0.7601 Epoch 15/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 8s 103ms/step - loss: 36.8757 - r2_score: 0.7150 - val_loss: 31.1051 - val_r2_score: 0.7801 Epoch 16/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 33ms/step - loss: 34.6752 - r2_score: 0.7318 - val_loss: 29.2155 - val_r2_score: 0.7935 Epoch 17/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 32ms/step - loss: 32.7577 - r2_score: 0.7465 - val_loss: 27.1382 - val_r2_score: 0.8082 Epoch 18/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 44ms/step - loss: 31.6345 - r2_score: 0.7547 - val_loss: 25.5313 - val_r2_score: 0.8195 Epoch 19/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 34ms/step - loss: 30.8265 - r2_score: 0.7608 - val_loss: 27.5814 - val_r2_score: 0.8050 Epoch 20/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 34ms/step - loss: 27.6467 - r2_score: 0.7862 - val_loss: 24.0735 - val_r2_score: 0.8298 Epoch 21/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 6s 47ms/step - loss: 29.5538 - r2_score: 0.7706 - val_loss: 23.3580 - val_r2_score: 0.8349 Epoch 22/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 34ms/step - loss: 26.2073 - r2_score: 0.7966 - val_loss: 21.6184 - val_r2_score: 0.8472 Epoch 23/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 34ms/step - loss: 24.5601 - r2_score: 0.8095 - val_loss: 21.9547 - val_r2_score: 0.8448 Epoch 24/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 6s 42ms/step - loss: 24.8704 - r2_score: 0.8068 - val_loss: 22.1077 - val_r2_score: 0.8437 Epoch 25/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 37ms/step - loss: 23.6243 - r2_score: 0.8167 - val_loss: 19.5699 - val_r2_score: 0.8617
print("Time taken in seconds ",end-start)
Time taken in seconds 99.04969620704651
plot(history,'loss')
plot(history,'r2_score')
results.loc[7]=[2,[128,32],['relu','relu'],epochs,batch_size,'SGD',(end-start),history.history["loss"][-1],history.history["val_loss"][-1],history.history["r2_score"][-1],history.history["val_r2_score"][-1]]
results
| # hidden layers | # neurons - hidden layer | activation function - hidden layer | # epochs | batch size | optimizer | time(secs) | Train_loss | Valid_loss | Train_R-squared | Valid_R-squared | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | - | - | - | 10 | 4814 | GD | 1.697181 | 116.842415 | 127.022896 | 0.077489 | 0.102093 |
| 1 | - | - | - | 25 | 4814 | GD | 3.839952 | 62.466640 | 66.866333 | 0.506804 | 0.527331 |
| 2 | - | - | - | 25 | 32 | SGD | 121.499400 | 25.865023 | 24.276419 | 0.795787 | 0.828393 |
| 3 | - | - | - | 25 | 64 | SGD | 57.317932 | 27.897743 | 26.422087 | 0.779738 | 0.813226 |
| 4 | 1 | 128 | sigmoid | 25 | 64 | SGD | 74.154647 | 13.616706 | 13.191008 | 0.892491 | 0.906755 |
| 5 | 1 | 128 | tanh | 25 | 64 | SGD | 78.145031 | 8.859550 | 9.715087 | 0.930051 | 0.931325 |
| 6 | 1 | 128 | relu | 25 | 64 | SGD | 78.647792 | 9.730357 | 14.345458 | 0.923175 | 0.898594 |
| 7 | 2 | [128, 32] | [relu, relu] | 25 | 64 | SGD | 99.049696 | 26.398695 | 19.569878 | 0.791573 | 0.861663 |
results
| # hidden layers | # neurons - hidden layer | activation function - hidden layer | # epochs | batch size | optimizer | time(secs) | Train_loss | Valid_loss | Train_R-squared | Valid_R-squared | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | - | - | - | 10 | 4814 | GD | 1.697181 | 116.842415 | 127.022896 | 0.077489 | 0.102093 |
| 1 | - | - | - | 25 | 4814 | GD | 3.839952 | 62.466640 | 66.866333 | 0.506804 | 0.527331 |
| 2 | - | - | - | 25 | 32 | SGD | 121.499400 | 25.865023 | 24.276419 | 0.795787 | 0.828393 |
| 3 | - | - | - | 25 | 64 | SGD | 57.317932 | 27.897743 | 26.422087 | 0.779738 | 0.813226 |
| 4 | 1 | 128 | sigmoid | 25 | 64 | SGD | 74.154647 | 13.616706 | 13.191008 | 0.892491 | 0.906755 |
| 5 | 1 | 128 | tanh | 25 | 64 | SGD | 78.145031 | 8.859550 | 9.715087 | 0.930051 | 0.931325 |
| 6 | 1 | 128 | relu | 25 | 64 | SGD | 78.647792 | 9.730357 | 14.345458 | 0.923175 | 0.898594 |
| 7 | 2 | [128, 32] | [relu, relu] | 25 | 64 | SGD | 99.049696 | 26.398695 | 19.569878 | 0.791573 | 0.861663 |
Among all other models, Model 6 achieved the highest training and validation scores.
A training R2 score of ~93% and a validation R2 score of ~94% suggest that the model is performing slightly better on the validation data compared to the training data.
We'll go ahead with this model as our final model.
Let's rebuild it and check its performance across multiple metrics
# clears the current Keras session, resetting all layers and models previously created, freeing up memory and resources.
tf.keras.backend.clear_session()
#Initializing the neural network
model = Sequential()
model.add(Dense(128,activation="relu",input_dim=x_train.shape[1]))
model.add(Dense(1))
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 128) │ 36,480 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 1) │ 129 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 36,609 (143.00 KB)
Trainable params: 36,609 (143.00 KB)
Non-trainable params: 0 (0.00 B)
optimizer = keras.optimizers.SGD() # defining SGD as the optimizer to be used
model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=metrics,run_eagerly=True)
epochs = 25
batch_size = 64
history = model.fit(x_train, y_train, validation_data=(x_test,y_test) , batch_size=batch_size, epochs=epochs)
Epoch 1/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 31ms/step - loss: 78.9293 - r2_score: 0.5921 - val_loss: 15.2803 - val_r2_score: 0.8406 Epoch 2/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 29ms/step - loss: 24.1295 - r2_score: 0.8110 - val_loss: 13.2398 - val_r2_score: 0.8619 Epoch 3/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - loss: 20.7745 - r2_score: 0.8370 - val_loss: 12.4986 - val_r2_score: 0.8696 Epoch 4/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 28ms/step - loss: 18.8243 - r2_score: 0.8524 - val_loss: 12.0415 - val_r2_score: 0.8744 Epoch 5/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - loss: 17.5857 - r2_score: 0.8623 - val_loss: 11.4448 - val_r2_score: 0.8806 Epoch 6/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 31ms/step - loss: 16.5880 - r2_score: 0.8702 - val_loss: 11.1857 - val_r2_score: 0.8833 Epoch 7/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 31ms/step - loss: 15.7657 - r2_score: 0.8769 - val_loss: 10.9638 - val_r2_score: 0.8856 Epoch 8/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 41ms/step - loss: 15.0834 - r2_score: 0.8824 - val_loss: 10.9710 - val_r2_score: 0.8855 Epoch 9/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 28ms/step - loss: 14.5815 - r2_score: 0.8865 - val_loss: 10.3246 - val_r2_score: 0.8923 Epoch 10/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - loss: 14.1321 - r2_score: 0.8901 - val_loss: 10.7137 - val_r2_score: 0.8882 Epoch 11/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - loss: 13.6907 - r2_score: 0.8937 - val_loss: 10.4253 - val_r2_score: 0.8912 Epoch 12/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 37ms/step - loss: 13.3379 - r2_score: 0.8965 - val_loss: 9.9992 - val_r2_score: 0.8957 Epoch 13/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 28ms/step - loss: 12.9711 - r2_score: 0.8994 - val_loss: 9.8395 - val_r2_score: 0.8973 Epoch 14/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 31ms/step - loss: 12.6858 - r2_score: 0.9017 - val_loss: 9.5834 - val_r2_score: 0.9000 Epoch 15/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 29ms/step - loss: 12.3908 - r2_score: 0.9040 - val_loss: 9.1654 - val_r2_score: 0.9044 Epoch 16/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 35ms/step - loss: 11.9403 - r2_score: 0.9074 - val_loss: 8.9765 - val_r2_score: 0.9063 Epoch 17/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 40ms/step - loss: 11.7436 - r2_score: 0.9090 - val_loss: 8.9314 - val_r2_score: 0.9068 Epoch 18/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - loss: 11.4443 - r2_score: 0.9113 - val_loss: 8.7917 - val_r2_score: 0.9083 Epoch 19/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 35ms/step - loss: 11.2153 - r2_score: 0.9132 - val_loss: 8.5586 - val_r2_score: 0.9107 Epoch 20/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 28ms/step - loss: 10.9767 - r2_score: 0.9150 - val_loss: 8.3914 - val_r2_score: 0.9124 Epoch 21/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 29ms/step - loss: 10.6837 - r2_score: 0.9173 - val_loss: 7.9760 - val_r2_score: 0.9168 Epoch 22/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 28ms/step - loss: 10.5235 - r2_score: 0.9184 - val_loss: 8.4672 - val_r2_score: 0.9116 Epoch 23/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 3s 34ms/step - loss: 10.2252 - r2_score: 0.9209 - val_loss: 8.0389 - val_r2_score: 0.9161 Epoch 24/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 5s 30ms/step - loss: 10.1904 - r2_score: 0.9211 - val_loss: 7.7792 - val_r2_score: 0.9188 Epoch 25/25 76/76 ━━━━━━━━━━━━━━━━━━━━ 2s 30ms/step - loss: 9.8304 - r2_score: 0.9238 - val_loss: 7.7395 - val_r2_score: 0.9192
train_perf = model_performance(model,x_train,y_train)
print("Train performance")
pd.DataFrame(train_perf)
151/151 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step Train performance
| RMSE | MAE | R-squared | Adj. R-squared | MAPE | |
|---|---|---|---|---|---|
| 0 | 3.290241 | 1.562554 | 0.914527 | 0.909168 | 19.063764 |
x_val.isnull().sum()
| 0 | |
|---|---|
| Kilometers_Driven | 0 |
| Seats | 0 |
| New_Price | 0 |
| mileage_num | 0 |
| engine_num | 0 |
| ... | ... |
| Model_xylo | 0 |
| Model_yeti | 0 |
| Model_z4 | 0 |
| Model_zen | 0 |
| Model_zest | 0 |
284 rows × 1 columns
y_val.isnull().sum()
0
valid_perf = model_performance(model,x_val,y_val)
print("Validation data performance")
pd.DataFrame(valid_perf)
19/19 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step Validation data performance
| RMSE | MAE | R-squared | Adj. R-squared | MAPE | |
|---|---|---|---|---|---|
| 0 | 3.569511 | 1.882157 | 0.909933 | 0.829242 | 19.889133 |
test_perf = model_performance(model,x_test,y_test)
print("Test performance")
pd.DataFrame(test_perf)
19/19 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step Test performance
| RMSE | MAE | R-squared | Adj. R-squared | MAPE | |
|---|---|---|---|---|---|
| 0 | 2.781989 | 1.501513 | 0.919244 | 0.846894 | 18.725387 |
The model has an $R^2$ of ~0.92 on the test set, which means it can explain ~92% of the variance in the unseen data
The RMSE value is ~2.8 , which means the model can predict the price of a used car within 2.8 units of the actual value
The MAPE value is ~18 , which means the model can predict the price of a used car within ~ 18% of the actual value
print('Author: Oneil Carter')
Author: Oneil Carter
!jupyter nbconvert --to html "/content/Predict_Price_Of_USed_Cars_With_Neural_Networks.ipynb"
[NbConvertApp] WARNING | pattern '/content/Predict_Price_Of_USed_Cars_With_Neural_Networks.ipynb' matched no files
This application is used to convert notebook files (*.ipynb)
to various other formats.
WARNING: THE COMMANDLINE INTERFACE MAY CHANGE IN FUTURE RELEASES.
Options
=======
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
<cmd> --help-all
--debug
set log level to logging.DEBUG (maximize logging output)
Equivalent to: [--Application.log_level=10]
--show-config
Show the application's configuration (human-readable format)
Equivalent to: [--Application.show_config=True]
--show-config-json
Show the application's configuration (json format)
Equivalent to: [--Application.show_config_json=True]
--generate-config
generate default config file
Equivalent to: [--JupyterApp.generate_config=True]
-y
Answer yes to any questions instead of prompting.
Equivalent to: [--JupyterApp.answer_yes=True]
--execute
Execute the notebook prior to export.
Equivalent to: [--ExecutePreprocessor.enabled=True]
--allow-errors
Continue notebook execution even if one of the cells throws an error and include the error message in the cell output (the default behaviour is to abort conversion). This flag is only relevant if '--execute' was specified, too.
Equivalent to: [--ExecutePreprocessor.allow_errors=True]
--stdin
read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'
Equivalent to: [--NbConvertApp.from_stdin=True]
--stdout
Write notebook output to stdout instead of files.
Equivalent to: [--NbConvertApp.writer_class=StdoutWriter]
--inplace
Run nbconvert in place, overwriting the existing notebook (only
relevant when converting to notebook format)
Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory=]
--clear-output
Clear output of current file and save in place,
overwriting the existing notebook.
Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory= --ClearOutputPreprocessor.enabled=True]
--no-prompt
Exclude input and output prompts from converted document.
Equivalent to: [--TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True]
--no-input
Exclude input cells and output prompts from converted document.
This mode is ideal for generating code-free reports.
Equivalent to: [--TemplateExporter.exclude_output_prompt=True --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True]
--allow-chromium-download
Whether to allow downloading chromium if no suitable version is found on the system.
Equivalent to: [--WebPDFExporter.allow_chromium_download=True]
--disable-chromium-sandbox
Disable chromium security sandbox when converting to PDF..
Equivalent to: [--WebPDFExporter.disable_sandbox=True]
--show-input
Shows code input. This flag is only useful for dejavu users.
Equivalent to: [--TemplateExporter.exclude_input=False]
--embed-images
Embed the images as base64 dataurls in the output. This flag is only useful for the HTML/WebPDF/Slides exports.
Equivalent to: [--HTMLExporter.embed_images=True]
--sanitize-html
Whether the HTML in Markdown cells and cell outputs should be sanitized..
Equivalent to: [--HTMLExporter.sanitize_html=True]
--log-level=<Enum>
Set the log level by value or name.
Choices: any of [0, 10, 20, 30, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL']
Default: 30
Equivalent to: [--Application.log_level]
--config=<Unicode>
Full path of a config file.
Default: ''
Equivalent to: [--JupyterApp.config_file]
--to=<Unicode>
The export format to be used, either one of the built-in formats
['asciidoc', 'custom', 'html', 'latex', 'markdown', 'notebook', 'pdf', 'python', 'rst', 'script', 'slides', 'webpdf']
or a dotted object name that represents the import path for an
``Exporter`` class
Default: ''
Equivalent to: [--NbConvertApp.export_format]
--template=<Unicode>
Name of the template to use
Default: ''
Equivalent to: [--TemplateExporter.template_name]
--template-file=<Unicode>
Name of the template file to use
Default: None
Equivalent to: [--TemplateExporter.template_file]
--theme=<Unicode>
Template specific theme(e.g. the name of a JupyterLab CSS theme distributed
as prebuilt extension for the lab template)
Default: 'light'
Equivalent to: [--HTMLExporter.theme]
--sanitize_html=<Bool>
Whether the HTML in Markdown cells and cell outputs should be sanitized.This
should be set to True by nbviewer or similar tools.
Default: False
Equivalent to: [--HTMLExporter.sanitize_html]
--writer=<DottedObjectName>
Writer class used to write the
results of the conversion
Default: 'FilesWriter'
Equivalent to: [--NbConvertApp.writer_class]
--post=<DottedOrNone>
PostProcessor class used to write the
results of the conversion
Default: ''
Equivalent to: [--NbConvertApp.postprocessor_class]
--output=<Unicode>
overwrite base name use for output files.
can only be used when converting one notebook at a time.
Default: ''
Equivalent to: [--NbConvertApp.output_base]
--output-dir=<Unicode>
Directory to write output(s) to. Defaults
to output to the directory of each notebook. To recover
previous default behaviour (outputting to the current
working directory) use . as the flag value.
Default: ''
Equivalent to: [--FilesWriter.build_directory]
--reveal-prefix=<Unicode>
The URL prefix for reveal.js (version 3.x).
This defaults to the reveal CDN, but can be any url pointing to a copy
of reveal.js.
For speaker notes to work, this must be a relative path to a local
copy of reveal.js: e.g., "reveal.js".
If a relative path is given, it must be a subdirectory of the
current directory (from which the server is run).
See the usage documentation
(https://nbconvert.readthedocs.io/en/latest/usage.html#reveal-js-html-slideshow)
for more details.
Default: ''
Equivalent to: [--SlidesExporter.reveal_url_prefix]
--nbformat=<Enum>
The nbformat version to write.
Use this to downgrade notebooks.
Choices: any of [1, 2, 3, 4]
Default: 4
Equivalent to: [--NotebookExporter.nbformat_version]
Examples
--------
The simplest way to use nbconvert is
> jupyter nbconvert mynotebook.ipynb --to html
Options include ['asciidoc', 'custom', 'html', 'latex', 'markdown', 'notebook', 'pdf', 'python', 'rst', 'script', 'slides', 'webpdf'].
> jupyter nbconvert --to latex mynotebook.ipynb
Both HTML and LaTeX support multiple output templates. LaTeX includes
'base', 'article' and 'report'. HTML includes 'basic', 'lab' and
'classic'. You can specify the flavor of the format used.
> jupyter nbconvert --to html --template lab mynotebook.ipynb
You can also pipe the output to stdout, rather than a file
> jupyter nbconvert mynotebook.ipynb --stdout
PDF is generated via latex
> jupyter nbconvert mynotebook.ipynb --to pdf
You can get (and serve) a Reveal.js-powered slideshow
> jupyter nbconvert myslides.ipynb --to slides --post serve
Multiple notebooks can be given at the command line in a couple of
different ways:
> jupyter nbconvert notebook*.ipynb
> jupyter nbconvert notebook1.ipynb notebook2.ipynb
or you can specify the notebooks list in a config file, containing::
c.NbConvertApp.notebooks = ["my_notebook.ipynb"]
> jupyter nbconvert --config mycfg.py
To see all available configurables, use `--help-all`.